desiru 0.1.1 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,889 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'json'
4
+
5
+ module Desiru
6
+ module Optimizers
7
+ # MIPROv2 - Multi-objective Instruction Prompt Optimization v2
8
+ # Uses Bayesian optimization to optimize prompts and demonstrations across multiple objectives
9
+ class MIPROv2 < Base
10
+ attr_reader :optimization_history, :pareto_frontier, :trace_collector
11
+
12
+ def initialize(metric: :exact_match, objectives: nil, **config)
13
+ super(metric: metric, **config)
14
+ @objectives = normalize_objectives(objectives || [metric])
15
+ @optimization_history = []
16
+ @pareto_frontier = []
17
+ @gaussian_process = GaussianProcess.new
18
+ @acquisition_function = config[:acquisition_function] || :expected_improvement
19
+ @trace_collector = config[:trace_collector] || Core.trace_collector
20
+ @instruction_candidates = []
21
+ @demonstration_candidates = []
22
+ end
23
+
24
+ def compile(program, trainset:, valset: nil)
25
+ trace_optimization('Starting MIPROv2 optimization', {
26
+ trainset_size: trainset.size,
27
+ valset_size: valset&.size || 0,
28
+ objectives: @objectives.map(&:to_s),
29
+ config: config
30
+ })
31
+
32
+ begin
33
+ # Initialize optimization state
34
+ @current_program = deep_copy_program(program)
35
+ @trainset = trainset
36
+ @valset = valset || trainset
37
+ @iteration = 0
38
+
39
+ # Clear trace collector for fresh optimization
40
+ @trace_collector.clear if config[:clear_traces]
41
+
42
+ # Enable tracing on all modules
43
+ enable_program_tracing(@current_program)
44
+
45
+ # Run Bayesian optimization loop
46
+ while @iteration < config[:max_iterations] && !should_stop?
47
+ @iteration += 1
48
+ trace_optimization("Iteration #{@iteration}", { phase: 'start' })
49
+
50
+ # Generate candidates using acquisition function
51
+ candidates = generate_candidates
52
+
53
+ # Evaluate candidates
54
+ evaluated_candidates = evaluate_candidates(candidates)
55
+
56
+ # Update Gaussian Process with results
57
+ update_gaussian_process(evaluated_candidates)
58
+
59
+ # Update Pareto frontier for multi-objective optimization
60
+ update_pareto_frontier(evaluated_candidates)
61
+
62
+ # Select best candidate
63
+ best_candidate = select_best_candidate(evaluated_candidates)
64
+
65
+ # Apply best candidate to program
66
+ apply_candidate(@current_program, best_candidate) if best_candidate
67
+
68
+ # Log iteration results - always log even if no best candidate
69
+ if best_candidate
70
+ log_iteration_results(best_candidate, evaluated_candidates)
71
+ elsif evaluated_candidates.any?
72
+ # Log with the first candidate if no best found
73
+ log_iteration_results(evaluated_candidates.first, evaluated_candidates)
74
+ end
75
+ end
76
+
77
+ # Restore trace state
78
+ disable_program_tracing(@current_program) if config[:restore_trace_state]
79
+
80
+ # Return optimized program
81
+ @current_program
82
+ rescue StandardError => e
83
+ trace_optimization('Optimization failed', { error: e.message, backtrace: e.backtrace.first(3) })
84
+ begin
85
+ disable_program_tracing(@current_program) if config[:restore_trace_state]
86
+ rescue StandardError
87
+ nil
88
+ end
89
+
90
+ # Return original program on error
91
+ program
92
+ ensure
93
+ # Always disable tracing at the end if enabled
94
+ begin
95
+ disable_program_tracing(@current_program) if config[:restore_trace_state]
96
+ rescue StandardError
97
+ nil
98
+ end
99
+ end
100
+ end
101
+
102
+ def optimize_module(module_instance, examples)
103
+ trace_optimization('Optimizing module with MIPROv2', {
104
+ module: module_instance.class.name,
105
+ examples_count: examples.size
106
+ })
107
+
108
+ # Generate instruction variants
109
+ instruction_variants = generate_instruction_variants(module_instance, examples)
110
+
111
+ # Generate demonstration sets
112
+ demo_sets = generate_demonstration_sets(module_instance, examples)
113
+
114
+ # Evaluate all combinations
115
+ best_config = nil
116
+ best_score = -Float::INFINITY
117
+
118
+ instruction_variants.each do |instruction|
119
+ demo_sets.each do |demos|
120
+ score = evaluate_module_config(module_instance, instruction, demos, examples)
121
+
122
+ if score > best_score
123
+ best_score = score
124
+ best_config = { instruction: instruction, demos: demos }
125
+ end
126
+ end
127
+ end
128
+
129
+ # Create optimized module
130
+ optimized = module_instance.with_demos(best_config[:demos])
131
+ optimized.instruction = best_config[:instruction] if optimized.respond_to?(:instruction=)
132
+
133
+ optimized
134
+ end
135
+
136
+ def generate_instruction_variants(module_instance, _examples)
137
+ # Generate different instruction styles
138
+ signature = module_instance.signature
139
+ [
140
+ generate_instruction(signature, 'concise', 0.2),
141
+ generate_instruction(signature, 'detailed', 0.5),
142
+ generate_instruction(signature, 'step-by-step', 0.8)
143
+ ]
144
+ end
145
+
146
+ def generate_demonstration_sets(_module_instance, examples)
147
+ return [[]] if examples.empty?
148
+
149
+ # Generate different demo sets
150
+ sets = []
151
+
152
+ # Empty set
153
+ sets << []
154
+
155
+ # Random subset
156
+ [1, 2, 3].each do |count|
157
+ break if count > examples.size
158
+
159
+ sets << examples.sample(count)
160
+ end
161
+
162
+ # Diverse set
163
+ sets << select_diverse_demonstrations(examples, [examples.size, 3].min, Random.new) if examples.size > 1
164
+
165
+ sets
166
+ end
167
+
168
+ def evaluate_module_config(module_instance, instruction, demos, examples)
169
+ # Simple evaluation - could be enhanced
170
+ test_module = module_instance.with_demos(demos)
171
+
172
+ test_module.instruction = instruction if test_module.respond_to?(:instruction=) && instruction
173
+
174
+ # Evaluate on subset of examples
175
+ eval_examples = examples.sample([examples.size, 5].min)
176
+ scores = eval_examples.map do |ex|
177
+ # Extract inputs (exclude answer/output fields)
178
+ inputs = {}
179
+ ex.to_h.each do |k, v|
180
+ inputs[k] = v unless %i[answer output].include?(k)
181
+ end
182
+
183
+ result = test_module.call(inputs)
184
+ score_prediction(result, ex)
185
+ rescue StandardError
186
+ 0.0
187
+ end
188
+
189
+ scores.empty? ? 0.0 : scores.sum.to_f / scores.size
190
+ end
191
+
192
+ private
193
+
194
+ def normalize_objectives(objectives)
195
+ objectives.map { |obj| normalize_metric(obj) }
196
+ end
197
+
198
+ def generate_candidates
199
+ trace_optimization("Generating candidates", {
200
+ iteration: @iteration,
201
+ acquisition_function: @acquisition_function
202
+ })
203
+
204
+ # Use Gaussian Process to guide candidate generation
205
+ if @optimization_history.empty?
206
+ # Initial random sampling
207
+ generate_random_candidates(config[:num_candidates])
208
+ else
209
+ # Use acquisition function to generate candidates
210
+ generate_guided_candidates(config[:num_candidates])
211
+ end
212
+ end
213
+
214
+ def generate_random_candidates(num)
215
+ (1..num).map do |i|
216
+ {
217
+ id: "random_#{@iteration}_#{i}",
218
+ instruction_seed: rand,
219
+ demo_seed: rand,
220
+ temperature: 0.1 + (rand * 0.8),
221
+ demo_count: rand(1..config[:max_bootstrapped_demos]),
222
+ instruction_style: %w[concise detailed step-by-step].sample,
223
+ demo_selection: %w[random diverse similar].sample
224
+ }
225
+ end
226
+ end
227
+
228
+ def generate_guided_candidates(num)
229
+ candidates = []
230
+
231
+ # Get best performers from history
232
+ best_historical = @optimization_history
233
+ .sort_by { |h| -h[:scores].values.sum }
234
+ .first(5)
235
+
236
+ # Generate variations of best performers
237
+ best_historical.each do |hist|
238
+ next unless hist[:candidate] # Skip if no candidate
239
+
240
+ 2.times do
241
+ candidate = mutate_candidate(hist[:candidate])
242
+ candidates << candidate
243
+ end
244
+ end
245
+
246
+ # Fill remaining slots with acquisition function-guided candidates
247
+ while candidates.size < num
248
+ candidate = generate_acquisition_candidate
249
+ candidates << candidate
250
+ end
251
+
252
+ candidates.first(num)
253
+ end
254
+
255
+ def mutate_candidate(base_candidate)
256
+ return generate_random_candidates(1).first if base_candidate.nil?
257
+
258
+ mutated = base_candidate.dup
259
+ mutated[:id] = "mutated_#{@iteration}_#{rand(1000)}"
260
+
261
+ # Mutate parameters with small variations
262
+ mutated[:instruction_seed] = constrain((base_candidate[:instruction_seed] || rand) + gaussian_noise(0.1), 0, 1)
263
+ mutated[:demo_seed] = constrain((base_candidate[:demo_seed] || rand) + gaussian_noise(0.1), 0, 1)
264
+ mutated[:temperature] = constrain((base_candidate[:temperature] || 0.5) + gaussian_noise(0.05), 0.1, 0.9)
265
+ mutated[:demo_count] = constrain(
266
+ (base_candidate[:demo_count] || 2) + gaussian_noise(0.5).round,
267
+ 1,
268
+ config[:max_bootstrapped_demos]
269
+ )
270
+
271
+ mutated
272
+ end
273
+
274
+ def generate_acquisition_candidate
275
+ # Use acquisition function to find promising regions
276
+ best_point = optimize_acquisition_function
277
+
278
+ {
279
+ id: "acquisition_#{@iteration}_#{rand(1000)}",
280
+ instruction_seed: best_point[0],
281
+ demo_seed: best_point[1],
282
+ temperature: best_point[2],
283
+ demo_count: best_point[3].round.clamp(1, config[:max_bootstrapped_demos]),
284
+ instruction_style: select_instruction_style(best_point[0]),
285
+ demo_selection: select_demo_strategy(best_point[1])
286
+ }
287
+ end
288
+
289
+ def evaluate_candidates(candidates)
290
+ trace_optimization("Evaluating #{candidates.size} candidates", {})
291
+
292
+ candidates.map do |candidate|
293
+ # Validate candidate has required fields
294
+ next unless candidate.is_a?(Hash) && candidate[:id]
295
+
296
+ # Apply candidate configuration to program
297
+ test_program = deep_copy_program(@current_program)
298
+ apply_candidate(test_program, candidate)
299
+
300
+ # Evaluate on validation set
301
+ scores = evaluate_multi_objective(test_program, @valset)
302
+
303
+ # Collect traces for this candidate
304
+ candidate_traces = collect_candidate_traces(candidate[:id])
305
+
306
+ {
307
+ candidate: candidate,
308
+ scores: scores,
309
+ traces: candidate_traces,
310
+ timestamp: Time.now
311
+ }
312
+ rescue StandardError => e
313
+ trace_optimization("Candidate evaluation failed", {
314
+ candidate_id: candidate[:id] || 'unknown',
315
+ error: e.message
316
+ })
317
+ {
318
+ candidate: candidate,
319
+ scores: {},
320
+ traces: [],
321
+ timestamp: Time.now,
322
+ error: e.message
323
+ }
324
+ end.compact
325
+ end
326
+
327
+ def evaluate_multi_objective(program, dataset)
328
+ scores = {}
329
+
330
+ @objectives.each do |objective|
331
+ evaluator = create_evaluator(objective)
332
+ result = evaluator.evaluate(program, dataset)
333
+ scores[objective] = result[:average_score]
334
+ end
335
+
336
+ scores
337
+ end
338
+
339
+ def create_evaluator(objective)
340
+ # Create a temporary evaluator for each objective
341
+ self.class.superclass.new(metric: objective, config: config)
342
+ end
343
+
344
+ def update_gaussian_process(evaluated_candidates)
345
+ # Convert candidates to feature vectors
346
+ evaluated_candidates.each do |eval|
347
+ features = candidate_to_features(eval[:candidate])
348
+ # For multi-objective, use scalarized score
349
+ score = scalarize_objectives(eval[:scores])
350
+ @gaussian_process.add_observation(features, score)
351
+ end
352
+
353
+ @gaussian_process.update
354
+ end
355
+
356
+ def candidate_to_features(candidate)
357
+ [
358
+ candidate[:instruction_seed],
359
+ candidate[:demo_seed],
360
+ candidate[:temperature],
361
+ candidate[:demo_count].to_f / config[:max_bootstrapped_demos]
362
+ ]
363
+ end
364
+
365
+ def scalarize_objectives(scores)
366
+ # Simple weighted sum - could be improved with user preferences
367
+ weights = @objectives.map { 1.0 / @objectives.size }
368
+ scores.values.zip(weights).map { |s, w| (s || 0) * w }.sum
369
+ end
370
+
371
+ def update_pareto_frontier(evaluated_candidates)
372
+ # Add new candidates to frontier
373
+ evaluated_candidates.each do |eval|
374
+ @pareto_frontier << eval
375
+ end
376
+
377
+ # Remove dominated solutions
378
+ @pareto_frontier = compute_pareto_frontier(@pareto_frontier)
379
+
380
+ trace_optimization("Updated Pareto frontier", {
381
+ size: @pareto_frontier.size,
382
+ best_scores: @pareto_frontier.first(3).map { |e| e[:scores] }
383
+ })
384
+ end
385
+
386
+ def compute_pareto_frontier(candidates)
387
+ frontier = []
388
+
389
+ candidates.each do |candidate|
390
+ dominated = false
391
+
392
+ candidates.each do |other|
393
+ next if candidate == other
394
+
395
+ if dominates?(other[:scores], candidate[:scores])
396
+ dominated = true
397
+ break
398
+ end
399
+ end
400
+
401
+ frontier << candidate unless dominated
402
+ end
403
+
404
+ frontier
405
+ end
406
+
407
+ def dominates?(scores1, scores2)
408
+ # For minimization objectives, flip the comparison
409
+ at_least_one_better = false
410
+
411
+ @objectives.each do |obj|
412
+ # Handle nil scores
413
+ score1 = scores1[obj] || 0
414
+ score2 = scores2[obj] || 0
415
+
416
+ return false if score1 < score2
417
+
418
+ at_least_one_better = true if score1 > score2
419
+ end
420
+
421
+ at_least_one_better
422
+ end
423
+
424
+ def select_best_candidate(evaluated_candidates)
425
+ return nil if evaluated_candidates.empty?
426
+
427
+ # Filter out candidates with nil scores
428
+ valid_candidates = evaluated_candidates.reject { |c| c[:scores].nil? || c[:scores].empty? }
429
+ return nil if valid_candidates.empty?
430
+
431
+ # For single objective, pick best
432
+ if @objectives.size == 1
433
+ valid_candidates.max_by { |e| e[:scores][@objectives.first] || 0 }
434
+ else
435
+ # For multi-objective, pick from Pareto frontier based on preferences
436
+ # Filter valid candidates from frontier
437
+ valid_frontier = @pareto_frontier.reject { |c| c[:scores].nil? || c[:scores].empty? }
438
+ return nil if valid_frontier.empty?
439
+
440
+ valid_frontier.max_by { |e| scalarize_objectives(e[:scores]) }
441
+ end
442
+ end
443
+
444
+ def apply_candidate(program, candidate)
445
+ return unless candidate
446
+
447
+ # Apply instruction modifications
448
+ apply_instruction_changes(program, candidate)
449
+
450
+ # Apply demonstration selection
451
+ apply_demonstration_changes(program, candidate)
452
+
453
+ # Store candidate configuration in program metadata
454
+ return unless program.respond_to?(:metadata=)
455
+
456
+ program.metadata[:mipro_config] = candidate
457
+ end
458
+
459
+ def apply_instruction_changes(program, candidate)
460
+ modules = extract_program_modules(program)
461
+
462
+ modules.each_value do |mod|
463
+ next unless mod.respond_to?(:signature)
464
+
465
+ # Generate instruction based on candidate parameters
466
+ instruction = generate_instruction(
467
+ mod.signature,
468
+ candidate[:instruction_style],
469
+ candidate[:instruction_seed]
470
+ )
471
+
472
+ # Apply if module supports custom instructions
473
+ mod.instruction = instruction if mod.respond_to?(:instruction=)
474
+ end
475
+ end
476
+
477
+ def apply_demonstration_changes(program, candidate)
478
+ modules = extract_program_modules(program)
479
+
480
+ modules.each_value do |mod|
481
+ # Select demonstrations based on candidate strategy
482
+ demos = select_demonstrations(
483
+ mod,
484
+ @trainset,
485
+ candidate[:demo_count],
486
+ candidate[:demo_selection],
487
+ candidate[:demo_seed]
488
+ )
489
+
490
+ # Apply demonstrations
491
+ optimized_module = mod.with_demos(demos)
492
+ update_program_module(program, mod, optimized_module)
493
+ end
494
+ end
495
+
496
+ def generate_instruction(signature, style, seed)
497
+ # Use seed for reproducibility
498
+ seed ||= rand # Fallback if seed is nil
499
+ Random.new((seed * 1_000_000).to_i)
500
+
501
+ # Handle both string and Signature object
502
+ if signature.is_a?(String)
503
+ # Parse signature string to extract input/output fields
504
+ parts = signature.split('->').map(&:strip)
505
+ return signature unless parts.size == 2
506
+
507
+ input_fields = parts[0].split(',').map(&:strip).map { |f| f.split(':').first.strip }
508
+ output_fields = parts[1].split(',').map(&:strip).map { |f| f.split(':').first.strip }
509
+
510
+ # Fallback for simple signatures
511
+
512
+ else
513
+ # It's a Signature object
514
+ input_fields = signature.input_fields.keys
515
+ output_fields = signature.output_fields.keys
516
+ end
517
+
518
+ base_instruction = signature.to_s
519
+ style ||= 'concise' # Default style if nil
520
+
521
+ case style
522
+ when 'concise'
523
+ "Given #{input_fields.join(', ')}, output #{output_fields.join(', ')}."
524
+ when 'detailed'
525
+ if signature.is_a?(String)
526
+ "Process the following inputs: #{input_fields.join(', ')}. " \
527
+ "Generate these outputs: #{output_fields.join(', ')}. Be thorough and accurate."
528
+ else
529
+ input_desc = signature.input_fields.map { |k, f| "#{k} (#{f.type})" }.join(', ')
530
+ output_desc = signature.output_fields.map { |k, f| "#{k} (#{f.type})" }.join(', ')
531
+ "Process the following inputs: #{input_desc}. " \
532
+ "Generate these outputs: #{output_desc}. Be thorough and accurate."
533
+ end
534
+ when 'step-by-step'
535
+ "Follow these steps:\n" \
536
+ "1. Analyze the inputs: #{input_fields.join(', ')}\n" \
537
+ "2. Process the information carefully\n" \
538
+ "3. Generate outputs: #{output_fields.join(', ')}"
539
+ else
540
+ base_instruction
541
+ end
542
+ end
543
+
544
+ def select_demonstrations(module_instance, examples, count, strategy, seed)
545
+ count ||= 0 # Default count if nil
546
+ return [] if count.zero? || examples.empty?
547
+
548
+ # Use seed for reproducibility
549
+ seed ||= rand # Fallback if seed is nil
550
+ rng = Random.new((seed * 1_000_000).to_i)
551
+ available = examples.dup
552
+
553
+ case strategy
554
+ when 'random'
555
+ available.sample(count, random: rng)
556
+ when 'diverse'
557
+ select_diverse_demonstrations(available, count, rng)
558
+ when 'similar'
559
+ select_similar_demonstrations(module_instance, available, count, rng)
560
+ else
561
+ available.first(count)
562
+ end
563
+ end
564
+
565
+ def select_diverse_demonstrations(examples, count, rng)
566
+ selected = []
567
+ remaining = examples.shuffle(random: rng)
568
+
569
+ while selected.size < count && remaining.any?
570
+ # Add most different from current selection
571
+ best_candidate = remaining.max_by do |ex|
572
+ min_distance_to_selected(ex, selected)
573
+ end
574
+
575
+ selected << best_candidate
576
+ remaining.delete(best_candidate)
577
+ end
578
+
579
+ selected
580
+ end
581
+
582
+ def select_similar_demonstrations(_module_instance, examples, count, rng)
583
+ # Group by similarity and select representatives
584
+ clusters = cluster_examples(examples, count)
585
+ clusters.map { |cluster| cluster.sample(random: rng) }.compact.first(count)
586
+ end
587
+
588
+ def min_distance_to_selected(example, selected)
589
+ return Float::INFINITY if selected.empty?
590
+
591
+ selected.map { |sel| example_distance(example, sel) }.min
592
+ end
593
+
594
+ def example_distance(ex1, ex2)
595
+ # Simple distance based on shared keys and values
596
+ keys1 = ex1.keys.to_set
597
+ keys2 = ex2.keys.to_set
598
+
599
+ shared_keys = keys1 & keys2
600
+ return 1.0 if shared_keys.empty?
601
+
602
+ differences = shared_keys.count { |k| ex1[k] != ex2[k] }
603
+ differences.to_f / shared_keys.size
604
+ end
605
+
606
+ def cluster_examples(examples, num_clusters)
607
+ # Simple clustering - could be improved with k-means
608
+ return [examples] if num_clusters == 1
609
+
610
+ clusters = Array.new(num_clusters) { [] }
611
+ examples.each_with_index do |ex, i|
612
+ clusters[i % num_clusters] << ex
613
+ end
614
+
615
+ clusters.reject(&:empty?)
616
+ end
617
+
618
+ def collect_candidate_traces(candidate_id)
619
+ # Filter traces that occurred during this candidate's evaluation
620
+ @trace_collector.traces.select do |trace|
621
+ trace.metadata[:candidate_id] == candidate_id
622
+ end
623
+ end
624
+
625
+ def log_iteration_results(best_candidate, all_candidates)
626
+ @optimization_history << {
627
+ iteration: @iteration,
628
+ best_candidate: best_candidate[:candidate],
629
+ scores: best_candidate[:scores] || {},
630
+ all_scores: all_candidates.map { |c| c[:scores] || {} },
631
+ pareto_size: @pareto_frontier.size,
632
+ timestamp: Time.now
633
+ }
634
+
635
+ trace_optimization("Iteration #{@iteration} complete", {
636
+ best_scores: best_candidate[:scores] || {},
637
+ candidates_evaluated: all_candidates.size,
638
+ traces_collected: @trace_collector.size
639
+ })
640
+ end
641
+
642
+ def should_stop?
643
+ return true if @iteration >= config[:max_iterations]
644
+
645
+ # Check if we've reached target performance
646
+ if @optimization_history.any?
647
+ best_score = @optimization_history.last[:scores].values.max
648
+ return true if best_score >= config[:stop_at_score]
649
+ end
650
+
651
+ # Check for convergence
652
+ if @optimization_history.size >= 5
653
+ recent_scores = @optimization_history.last(5).map { |h| h[:scores].values.max }
654
+ variance = statistical_variance(recent_scores)
655
+ return true if variance < config[:convergence_threshold]
656
+ end
657
+
658
+ false
659
+ end
660
+
661
+ def statistical_variance(values)
662
+ mean = values.sum.to_f / values.size
663
+ values.map { |v| (v - mean)**2 }.sum / values.size
664
+ end
665
+
666
+ def deep_copy_program(program)
667
+ # This needs proper implementation based on program structure
668
+ # For now, just return the program as optimizers typically create new modules
669
+ program
670
+ end
671
+
672
+ def extract_program_modules(program)
673
+ modules = {}
674
+
675
+ # Check instance variables
676
+ program.instance_variables.each do |var|
677
+ value = program.instance_variable_get(var)
678
+ modules[var.to_s.delete('@').to_sym] = value if value.is_a?(Desiru::Module)
679
+ end
680
+
681
+ # Check if program has a modules method
682
+ if program.respond_to?(:modules)
683
+ program.modules.each do |name, mod|
684
+ modules[name] = mod if mod.is_a?(Desiru::Module)
685
+ end
686
+ end
687
+
688
+ modules
689
+ end
690
+
691
+ def update_program_module(program, old_module, new_module)
692
+ # Update instance variable if it matches
693
+ program.instance_variables.each do |var|
694
+ value = program.instance_variable_get(var)
695
+ program.instance_variable_set(var, new_module) if value == old_module
696
+ end
697
+
698
+ # Update in modules hash if program supports it
699
+ return unless program.respond_to?(:modules) && program.modules.is_a?(Hash)
700
+
701
+ program.modules.each do |name, mod|
702
+ program.modules[name] = new_module if mod == old_module
703
+ end
704
+ end
705
+
706
+ def enable_program_tracing(program)
707
+ modules = extract_program_modules(program)
708
+ modules.each_value do |mod|
709
+ mod.enable_trace! if mod.respond_to?(:enable_trace!)
710
+ end
711
+ end
712
+
713
+ def disable_program_tracing(program)
714
+ modules = extract_program_modules(program)
715
+ modules.each_value do |mod|
716
+ mod.disable_trace! if mod.respond_to?(:disable_trace!)
717
+ end
718
+ end
719
+
720
+ def optimize_acquisition_function
721
+ # Simple grid search - could be improved with gradient-based optimization
722
+ best_point = nil
723
+ best_value = -Float::INFINITY
724
+
725
+ 10.times do
726
+ point = [rand, rand, 0.1 + (rand * 0.8), rand * config[:max_bootstrapped_demos]]
727
+ value = compute_acquisition_value(point)
728
+
729
+ if value > best_value
730
+ best_value = value
731
+ best_point = point
732
+ end
733
+ end
734
+
735
+ best_point
736
+ end
737
+
738
+ def compute_acquisition_value(point)
739
+ case @acquisition_function
740
+ when :expected_improvement
741
+ expected_improvement(point)
742
+ when :upper_confidence_bound
743
+ upper_confidence_bound(point)
744
+ else
745
+ @gaussian_process.predict(point)[:mean]
746
+ end
747
+ end
748
+
749
+ def expected_improvement(point)
750
+ prediction = @gaussian_process.predict(point)
751
+ mean = prediction[:mean]
752
+ std = prediction[:std]
753
+
754
+ return 0.0 if std.zero?
755
+
756
+ best_so_far = @optimization_history.map { |h| scalarize_objectives(h[:scores]) }.max || 0
757
+ z = (mean - best_so_far) / std
758
+
759
+ # EI = (mean - best) * CDF(z) + std * PDF(z)
760
+ ((mean - best_so_far) * standard_normal_cdf(z)) + (std * standard_normal_pdf(z))
761
+ rescue StandardError => e
762
+ trace_optimization("Expected improvement calculation failed", { error: e.message })
763
+ 0.0 # Return 0 on error
764
+ end
765
+
766
+ def upper_confidence_bound(point, beta = 2.0)
767
+ prediction = @gaussian_process.predict(point)
768
+ prediction[:mean] + (beta * prediction[:std])
769
+ end
770
+
771
+ def standard_normal_pdf(value)
772
+ Math.exp(-0.5 * (value**2)) / Math.sqrt(2 * Math::PI)
773
+ end
774
+
775
+ def standard_normal_cdf(value)
776
+ 0.5 * (1 + Math.erf(value / Math.sqrt(2)))
777
+ end
778
+
779
+ def gaussian_noise(std_dev)
780
+ # Box-Muller transform for Gaussian noise
781
+ u1 = rand
782
+ u2 = rand
783
+ Math.sqrt(-2 * Math.log(u1)) * Math.cos(2 * Math::PI * u2) * std_dev
784
+ end
785
+
786
+ def constrain(value, min, max)
787
+ value.clamp(min, max)
788
+ end
789
+
790
+ def select_instruction_style(seed)
791
+ styles = %w[concise detailed step-by-step]
792
+ styles[(seed * styles.size).to_i]
793
+ end
794
+
795
+ def select_demo_strategy(seed)
796
+ strategies = %w[random diverse similar]
797
+ strategies[(seed * strategies.size).to_i]
798
+ end
799
+
800
+ def default_config
801
+ super.merge({
802
+ max_iterations: 20,
803
+ num_candidates: 8,
804
+ convergence_threshold: 0.001,
805
+ clear_traces: true,
806
+ restore_trace_state: true,
807
+ acquisition_function: :expected_improvement,
808
+ max_bootstrapped_demos: 3
809
+ })
810
+ end
811
+
812
+ # Simplified Gaussian Process implementation without matrix library
813
+ class GaussianProcess
814
+ def initialize(kernel = :rbf, length_scale = 1.0, noise = 0.1)
815
+ @kernel = kernel
816
+ @length_scale = length_scale
817
+ @noise = noise
818
+ @observations = []
819
+ @trained = false
820
+ end
821
+
822
+ def add_observation(features, value)
823
+ @observations << { features: features, value: value }
824
+ @trained = false
825
+ end
826
+
827
+ def update
828
+ # Simplified update - just mark as trained
829
+ @trained = !@observations.empty?
830
+ rescue StandardError => e
831
+ Desiru.logger&.warn("Gaussian Process update failed: #{e.message}")
832
+ @trained = false
833
+ end
834
+
835
+ def predict(features)
836
+ return { mean: 0.0, std: 1.0 } unless @trained && !@observations.empty?
837
+
838
+ # Simplified prediction using weighted average based on kernel similarity
839
+ weights = @observations.map do |obs|
840
+ kernel_function(features, obs[:features])
841
+ end
842
+
843
+ total_weight = weights.sum
844
+ return { mean: 0.0, std: 1.0 } if total_weight.zero?
845
+
846
+ # Normalize weights
847
+ weights = weights.map { |w| w / total_weight }
848
+
849
+ # Compute weighted mean
850
+ mean = @observations.zip(weights).map { |obs, w| obs[:value] * w }.sum
851
+
852
+ # Compute weighted variance for uncertainty
853
+ variance = @observations.zip(weights).map do |obs, w|
854
+ w * ((obs[:value] - mean)**2)
855
+ end.sum
856
+
857
+ std = Math.sqrt([variance + @noise, 0].max)
858
+
859
+ { mean: mean, std: std }
860
+ rescue StandardError => e
861
+ Desiru.logger&.warn("Gaussian Process prediction failed: #{e.message}")
862
+ { mean: 0.0, std: 1.0 }
863
+ end
864
+
865
+ private
866
+
867
+ def kernel_function(features1, features2)
868
+ # Only RBF kernel supported for now
869
+ rbf_kernel(features1, features2)
870
+ end
871
+
872
+ def rbf_kernel(features1, features2)
873
+ # Radial Basis Function kernel
874
+ distance = euclidean_distance(features1, features2)
875
+ Math.exp(-0.5 * ((distance / @length_scale)**2))
876
+ end
877
+
878
+ def euclidean_distance(features1, features2)
879
+ Math.sqrt(features1.zip(features2).map { |a, b| (a - b)**2 }.sum)
880
+ end
881
+ end
882
+ end
883
+ end
884
+ end
885
+
886
+ # Register in the main module namespace for convenience
887
+ module Desiru
888
+ MIPROv2 = Optimizers::MIPROv2
889
+ end