dspy 0.28.2 → 0.29.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (41) hide show
  1. checksums.yaml +4 -4
  2. data/README.md +3 -4
  3. data/lib/dspy/code_act.rb +14 -1
  4. data/lib/dspy/datasets/ade.rb +90 -0
  5. data/lib/dspy/datasets.rb +8 -0
  6. data/lib/dspy/lm.rb +4 -8
  7. data/lib/dspy/mixins/struct_builder.rb +17 -25
  8. data/lib/dspy/module.rb +12 -1
  9. data/lib/dspy/observability/async_span_processor.rb +67 -93
  10. data/lib/dspy/observability.rb +43 -1
  11. data/lib/dspy/predict.rb +10 -0
  12. data/lib/dspy/propose/dataset_summary_generator.rb +36 -3
  13. data/lib/dspy/propose/grounded_proposer.rb +118 -11
  14. data/lib/dspy/re_act.rb +13 -0
  15. data/lib/dspy/reflection_lm.rb +36 -0
  16. data/lib/dspy/teleprompt/gepa.rb +448 -2803
  17. data/lib/dspy/teleprompt/mipro_v2.rb +839 -91
  18. data/lib/dspy/teleprompt/utils.rb +8 -3
  19. data/lib/dspy/version.rb +2 -2
  20. data/lib/dspy.rb +3 -2
  21. data/lib/gepa/api.rb +61 -0
  22. data/lib/gepa/core/engine.rb +226 -0
  23. data/lib/gepa/core/evaluation_batch.rb +26 -0
  24. data/lib/gepa/core/result.rb +92 -0
  25. data/lib/gepa/core/state.rb +231 -0
  26. data/lib/gepa/logging/experiment_tracker.rb +54 -0
  27. data/lib/gepa/logging/logger.rb +57 -0
  28. data/lib/gepa/logging.rb +9 -0
  29. data/lib/gepa/proposer/base.rb +27 -0
  30. data/lib/gepa/proposer/merge_proposer.rb +424 -0
  31. data/lib/gepa/proposer/reflective_mutation/base.rb +48 -0
  32. data/lib/gepa/proposer/reflective_mutation/reflective_mutation.rb +188 -0
  33. data/lib/gepa/strategies/batch_sampler.rb +91 -0
  34. data/lib/gepa/strategies/candidate_selector.rb +97 -0
  35. data/lib/gepa/strategies/component_selector.rb +57 -0
  36. data/lib/gepa/strategies/instruction_proposal.rb +120 -0
  37. data/lib/gepa/telemetry.rb +122 -0
  38. data/lib/gepa/utils/pareto.rb +119 -0
  39. data/lib/gepa.rb +21 -0
  40. metadata +38 -3
  41. data/lib/dspy/teleprompt/simple_optimizer.rb +0 -503
@@ -1,7 +1,12 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  require 'digest'
4
+ require 'time'
5
+ require 'json'
6
+ require 'concurrent-ruby'
4
7
  require 'sorbet-runtime'
8
+ require 'securerandom'
9
+ require 'set'
5
10
  require_relative 'teleprompter'
6
11
  require_relative 'utils'
7
12
  require_relative '../propose/grounded_proposer'
@@ -27,6 +32,58 @@ module DSPy
27
32
  Bayesian = new("bayesian")
28
33
  end
29
34
  end
35
+
36
+ class AutoPreset < T::Enum
37
+ enums do
38
+ None = new("none")
39
+ Light = new("light")
40
+ Medium = new("medium")
41
+ Heavy = new("heavy")
42
+ end
43
+ end
44
+
45
+ AUTO_PRESET_SETTINGS = {
46
+ AutoPreset::None => {},
47
+ AutoPreset::Light => {
48
+ candidate_budget: 6,
49
+ instruction_candidates: 3,
50
+ instruction_candidates_when_fewshot: 3,
51
+ bootstrap_sets: 3,
52
+ max_bootstrapped_examples: 2,
53
+ max_labeled_examples: 8,
54
+ optimization_strategy: OptimizationStrategy::Greedy,
55
+ early_stopping_patience: 2,
56
+ valset_target_size: 100,
57
+ minibatch_size: nil
58
+ },
59
+ AutoPreset::Medium => {
60
+ candidate_budget: 12,
61
+ instruction_candidates: 5,
62
+ instruction_candidates_when_fewshot: 5,
63
+ bootstrap_sets: 5,
64
+ max_bootstrapped_examples: 4,
65
+ max_labeled_examples: 16,
66
+ optimization_strategy: OptimizationStrategy::Adaptive,
67
+ early_stopping_patience: 3,
68
+ valset_target_size: 300,
69
+ minibatch_size: nil
70
+ },
71
+ AutoPreset::Heavy => {
72
+ candidate_budget: 18,
73
+ instruction_candidates: 8,
74
+ instruction_candidates_when_fewshot: 8,
75
+ bootstrap_sets: 8,
76
+ max_bootstrapped_examples: 6,
77
+ max_labeled_examples: 24,
78
+ optimization_strategy: OptimizationStrategy::Bayesian,
79
+ early_stopping_patience: 5,
80
+ valset_target_size: 1000,
81
+ minibatch_size: nil
82
+ }
83
+ }.freeze
84
+
85
+ DEFAULT_AUTO_SEED = 42
86
+
30
87
  # MIPROv2: Multi-prompt Instruction Proposal with Retrieval Optimization
31
88
  # State-of-the-art prompt optimization combining bootstrap sampling,
32
89
  # instruction generation, and Bayesian optimization
@@ -47,13 +104,7 @@ module DSPy
47
104
  def self.light(metric: nil, **kwargs)
48
105
  optimizer = MIPROv2.new(metric: metric, **kwargs)
49
106
  optimizer.configure do |config|
50
- config.num_trials = 6
51
- config.num_instruction_candidates = 3
52
- config.max_bootstrapped_examples = 2
53
- config.max_labeled_examples = 8
54
- config.bootstrap_sets = 3
55
- config.optimization_strategy = :greedy
56
- config.early_stopping_patience = 2
107
+ MIPROv2.apply_auto_defaults(config, AutoPreset::Light)
57
108
  end
58
109
  optimizer
59
110
  end
@@ -67,13 +118,7 @@ module DSPy
67
118
  def self.medium(metric: nil, **kwargs)
68
119
  optimizer = MIPROv2.new(metric: metric, **kwargs)
69
120
  optimizer.configure do |config|
70
- config.num_trials = 12
71
- config.num_instruction_candidates = 5
72
- config.max_bootstrapped_examples = 4
73
- config.max_labeled_examples = 16
74
- config.bootstrap_sets = 5
75
- config.optimization_strategy = :adaptive
76
- config.early_stopping_patience = 3
121
+ MIPROv2.apply_auto_defaults(config, AutoPreset::Medium)
77
122
  end
78
123
  optimizer
79
124
  end
@@ -87,19 +132,33 @@ module DSPy
87
132
  def self.heavy(metric: nil, **kwargs)
88
133
  optimizer = MIPROv2.new(metric: metric, **kwargs)
89
134
  optimizer.configure do |config|
90
- config.num_trials = 18
91
- config.num_instruction_candidates = 8
92
- config.max_bootstrapped_examples = 6
93
- config.max_labeled_examples = 24
94
- config.bootstrap_sets = 8
95
- config.optimization_strategy = :bayesian
96
- config.early_stopping_patience = 5
135
+ MIPROv2.apply_auto_defaults(config, AutoPreset::Heavy)
97
136
  end
98
137
  optimizer
99
138
  end
100
139
  end
101
140
 
102
141
  # Dry-configurable settings for MIPROv2
142
+ setting :auto_preset, default: AutoPreset::None, constructor: ->(value) {
143
+ case value
144
+ when AutoPreset
145
+ value
146
+ when String, Symbol
147
+ begin
148
+ AutoPreset.deserialize(value.to_s.downcase)
149
+ rescue ArgumentError
150
+ raise ArgumentError, "Invalid auto preset: #{value}. Must be one of :none, :light, :medium, :heavy"
151
+ end
152
+ when nil
153
+ AutoPreset::None
154
+ else
155
+ raise ArgumentError, "Invalid auto preset: #{value.inspect}"
156
+ end
157
+ }
158
+ setting :auto_seed, default: DEFAULT_AUTO_SEED, constructor: ->(value) {
159
+ value.nil? ? DEFAULT_AUTO_SEED : Integer(value)
160
+ }
161
+ setting :valset_target_size, default: nil
103
162
  setting :num_trials, default: 12
104
163
  setting :num_instruction_candidates, default: 5
105
164
  setting :bootstrap_sets, default: 5
@@ -124,6 +183,7 @@ module DSPy
124
183
  setting :track_diversity, default: true
125
184
  setting :max_errors, default: 3
126
185
  setting :num_threads, default: 1
186
+ setting :minibatch_size, default: nil
127
187
 
128
188
  # Class-level configuration method - sets defaults for new instances
129
189
  def self.configure(&block)
@@ -138,6 +198,26 @@ module DSPy
138
198
  @default_config_block
139
199
  end
140
200
 
201
+ class << self
202
+ extend T::Sig
203
+
204
+ sig { params(config: T.untyped, preset: AutoPreset).void }
205
+ def apply_auto_defaults(config, preset)
206
+ settings = AUTO_PRESET_SETTINGS.fetch(preset) { {} }
207
+
208
+ config.auto_preset = preset
209
+ config.num_trials = settings[:candidate_budget] if settings[:candidate_budget]
210
+ config.num_instruction_candidates = settings[:instruction_candidates] if settings[:instruction_candidates]
211
+ config.bootstrap_sets = settings[:bootstrap_sets] if settings[:bootstrap_sets]
212
+ config.max_bootstrapped_examples = settings[:max_bootstrapped_examples] if settings.key?(:max_bootstrapped_examples)
213
+ config.max_labeled_examples = settings[:max_labeled_examples] if settings.key?(:max_labeled_examples)
214
+ config.optimization_strategy = settings[:optimization_strategy] if settings[:optimization_strategy]
215
+ config.early_stopping_patience = settings[:early_stopping_patience] if settings[:early_stopping_patience]
216
+ config.minibatch_size = settings[:minibatch_size] if settings.key?(:minibatch_size)
217
+ config.valset_target_size = settings[:valset_target_size] if settings[:valset_target_size]
218
+ end
219
+ end
220
+
141
221
 
142
222
  # Simple data structure for evaluated candidate configurations (immutable)
143
223
  EvaluatedCandidate = Data.define(
@@ -265,6 +345,7 @@ module DSPy
265
345
  @proposer = DSPy::Propose::GroundedProposer.new(config: DSPy::Propose::GroundedProposer::Config.new)
266
346
  @optimization_trace = []
267
347
  @evaluated_candidates = []
348
+ @trial_history = {}
268
349
  end
269
350
 
270
351
  # Main MIPROv2 optimization method
@@ -282,13 +363,20 @@ module DSPy
282
363
  trainset_size: trainset.size,
283
364
  valset_size: valset&.size || 0,
284
365
  num_trials: config.num_trials,
285
- optimization_strategy: config.optimization_strategy,
366
+ optimization_strategy: optimization_strategy_name,
286
367
  mode: infer_auto_mode
287
368
  }) do
288
369
  # Convert examples to typed format
289
370
  typed_trainset = ensure_typed_examples(trainset)
290
371
  typed_valset = valset ? ensure_typed_examples(valset) : nil
291
372
 
373
+ if auto_preset_active?
374
+ typed_trainset, typed_valset = prepare_datasets_for_auto(typed_trainset, typed_valset)
375
+ typed_valset = apply_auto_preset!(program, typed_valset)
376
+ else
377
+ typed_valset = limit_validation_set(typed_valset, config.valset_target_size)
378
+ end
379
+
292
380
  # Use validation set if available, otherwise use part of training set
293
381
  evaluation_set = typed_valset || typed_trainset.take([typed_trainset.size / 3, 10].max)
294
382
 
@@ -331,6 +419,8 @@ module DSPy
331
419
  proposal_result
332
420
  )
333
421
 
422
+ @trial_history = optimization_result[:trial_logs] || {}
423
+
334
424
  save_results(final_result)
335
425
  final_result
336
426
  end
@@ -338,6 +428,105 @@ module DSPy
338
428
 
339
429
  private
340
430
 
431
+ sig { returns(T::Boolean) }
432
+ def auto_preset_active?
433
+ config.auto_preset != AutoPreset::None
434
+ end
435
+
436
+ sig { params(trainset: T::Array[DSPy::Example], valset: T.nilable(T::Array[DSPy::Example])).returns([T::Array[DSPy::Example], T::Array[DSPy::Example]]) }
437
+ def prepare_datasets_for_auto(trainset, valset)
438
+ settings = auto_settings_for(config.auto_preset)
439
+ target_size = settings[:valset_target_size]
440
+ config.valset_target_size = target_size
441
+
442
+ if valset && valset.any?
443
+ [trainset, limit_validation_set(valset, target_size)]
444
+ else
445
+ raise ArgumentError, "Training set must contain at least 2 examples when auto presets are enabled" if trainset.size < 2
446
+
447
+ shuffled = trainset.shuffle(random: Random.new(config.auto_seed))
448
+ default_val_size = [
449
+ [(trainset.size * 0.8).ceil, 1].max,
450
+ trainset.size - 1
451
+ ].min
452
+
453
+ desired_val_size = target_size ? [default_val_size, target_size].min : default_val_size
454
+ desired_val_size = [[desired_val_size, 1].max, trainset.size - 1].min
455
+
456
+ validation_examples = shuffled.take(desired_val_size)
457
+ training_examples = shuffled.drop(desired_val_size)
458
+
459
+ [training_examples, limit_validation_set(validation_examples, target_size)]
460
+ end
461
+ end
462
+
463
+ sig { params(program: T.untyped, valset: T::Array[DSPy::Example]).returns(T::Array[DSPy::Example]) }
464
+ def apply_auto_preset!(program, valset)
465
+ settings = auto_settings_for(config.auto_preset)
466
+ zeroshot = zero_shot_for_settings?(settings)
467
+ candidate_budget = settings[:candidate_budget]
468
+
469
+ if candidate_budget && candidate_budget.positive?
470
+ config.num_trials = compute_trials_from_candidate_budget(program, candidate_budget, zeroshot)
471
+ instruction_candidates = if zeroshot
472
+ candidate_budget
473
+ else
474
+ settings[:instruction_candidates_when_fewshot] || (candidate_budget / 2.0).ceil
475
+ end
476
+ config.num_instruction_candidates = [instruction_candidates, 1].max
477
+ end
478
+
479
+ config.bootstrap_sets = settings[:bootstrap_sets] if settings[:bootstrap_sets]
480
+ config.max_bootstrapped_examples = settings[:max_bootstrapped_examples] if settings.key?(:max_bootstrapped_examples)
481
+ config.max_labeled_examples = settings[:max_labeled_examples] if settings.key?(:max_labeled_examples)
482
+ config.optimization_strategy = settings[:optimization_strategy] if settings[:optimization_strategy]
483
+ config.early_stopping_patience = settings[:early_stopping_patience] if settings[:early_stopping_patience]
484
+ config.minibatch_size = settings[:minibatch_size] if settings.key?(:minibatch_size)
485
+
486
+ config.valset_target_size = settings[:valset_target_size]
487
+ limit_validation_set(valset, config.valset_target_size)
488
+ end
489
+
490
+ sig { params(valset: T.nilable(T::Array[DSPy::Example]), target_size: T.nilable(Integer)).returns(T.nilable(T::Array[DSPy::Example])) }
491
+ def limit_validation_set(valset, target_size)
492
+ return valset unless valset && target_size && target_size.positive?
493
+ return valset if valset.size <= target_size
494
+
495
+ valset.shuffle(random: Random.new(config.auto_seed)).take(target_size)
496
+ end
497
+
498
+ sig { params(program: T.untyped, num_candidates: Integer, zeroshot: T::Boolean).returns(Integer) }
499
+ def compute_trials_from_candidate_budget(program, num_candidates, zeroshot)
500
+ predictor_count =
501
+ if program.respond_to?(:predictors)
502
+ Array(program.predictors).size
503
+ else
504
+ 1
505
+ end
506
+
507
+ predictor_count = 1 if predictor_count.zero?
508
+ variable_count = zeroshot ? predictor_count : predictor_count * 2
509
+ log_term = Math.log2([num_candidates, 2].max)
510
+
511
+ [
512
+ (2 * variable_count * log_term).ceil,
513
+ (1.5 * num_candidates).ceil
514
+ ].max
515
+ end
516
+
517
+ sig { params(settings: T::Hash[Symbol, T.untyped]).returns(T::Boolean) }
518
+ def zero_shot_for_settings?(settings)
519
+ settings.fetch(:max_bootstrapped_examples, 0).to_i.zero? &&
520
+ settings.fetch(:max_labeled_examples, 0).to_i.zero?
521
+ end
522
+
523
+ sig { params(preset: AutoPreset).returns(T::Hash[Symbol, T.untyped]) }
524
+ def auto_settings_for(preset)
525
+ AUTO_PRESET_SETTINGS.fetch(preset) do
526
+ raise ArgumentError, "Unknown auto preset: #{preset.inspect}"
527
+ end
528
+ end
529
+
341
530
  # Phase 1: Bootstrap few-shot examples from training data
342
531
  # Returns a hash mapping predictor indices to arrays of demo sets
343
532
  sig { params(program: T.untyped, trainset: T::Array[DSPy::Example]).returns(T::Hash[Integer, T::Array[T::Array[DSPy::FewShotExample]]]) }
@@ -368,10 +557,6 @@ module DSPy
368
557
  # Flatten demo sets from first predictor and take first 5 examples
369
558
  few_shot_examples = demo_candidates[0]&.flatten&.take(5) || []
370
559
 
371
- # Get signature class from program
372
- signature_class = extract_signature_class(program)
373
- raise ArgumentError, "Cannot extract signature class from program" unless signature_class
374
-
375
560
  # Re-initialize proposer with program and trainset for awareness features
376
561
  # This enables program_aware and use_dataset_summary flags to work correctly
377
562
  proposer_config = DSPy::Propose::GroundedProposer::Config.new
@@ -383,11 +568,12 @@ module DSPy
383
568
  trainset: trainset
384
569
  )
385
570
 
386
- @proposer.propose_instructions(
387
- signature_class,
388
- trainset,
389
- few_shot_examples: few_shot_examples,
390
- current_instruction: current_instruction
571
+ @proposer.propose_instructions_for_program(
572
+ trainset: trainset,
573
+ program: program,
574
+ demo_candidates: demo_candidates,
575
+ trial_logs: @trial_history,
576
+ num_instruction_candidates: config.num_instruction_candidates
391
577
  )
392
578
  end
393
579
 
@@ -406,12 +592,18 @@ module DSPy
406
592
 
407
593
  # Initialize optimization state
408
594
  optimization_state = initialize_optimization_state(candidates)
409
-
595
+
596
+ # Initialize trial tracking structures
597
+ trial_logs = {}
598
+ param_score_dict = Hash.new { |hash, key| hash[key] = [] }
599
+ fully_evaled_param_combos = {}
600
+ total_eval_calls = 0
601
+
410
602
  # Run optimization trials
411
603
  trials_completed = 0
412
604
  best_score = 0.0
413
605
  best_candidate = nil
414
- best_program = nil
606
+ best_program = program
415
607
  best_evaluation_result = nil
416
608
 
417
609
  config.num_trials.times do |trial_idx|
@@ -419,6 +611,14 @@ module DSPy
419
611
 
420
612
  # Select next candidate based on optimization strategy
421
613
  candidate = select_next_candidate(candidates, optimization_state, trial_idx)
614
+ batch_size = evaluation_set.size
615
+
616
+ trial_logs[trials_completed] = create_trial_log_entry(
617
+ trial_number: trials_completed,
618
+ candidate: candidate,
619
+ evaluation_type: :full,
620
+ batch_size: batch_size
621
+ )
422
622
 
423
623
  emit_event('trial_start', {
424
624
  trial_number: trials_completed,
@@ -430,12 +630,30 @@ module DSPy
430
630
  begin
431
631
  # Evaluate candidate
432
632
  score, modified_program, evaluation_result = evaluate_candidate(program, candidate, evaluation_set)
633
+ total_eval_calls += batch_size
634
+
635
+ instructions_snapshot = extract_program_instructions(modified_program)
636
+ trial_logs[trials_completed][:instructions] = instructions_snapshot unless instructions_snapshot.empty?
637
+ trial_logs[trials_completed][:instruction] = instructions_snapshot[0] if instructions_snapshot.key?(0)
433
638
 
434
639
  # Update optimization state
435
640
  update_optimization_state(optimization_state, candidate, score)
641
+ record_param_score(
642
+ param_score_dict,
643
+ candidate,
644
+ score,
645
+ evaluation_type: :full,
646
+ instructions: instructions_snapshot
647
+ )
648
+ update_fully_evaled_param_combos(
649
+ fully_evaled_param_combos,
650
+ candidate,
651
+ score,
652
+ instructions: instructions_snapshot
653
+ )
436
654
 
437
655
  # Track best result
438
- is_best = score > best_score
656
+ is_best = best_candidate.nil? || score > best_score
439
657
  if is_best
440
658
  best_score = score
441
659
  best_candidate = candidate
@@ -443,6 +661,15 @@ module DSPy
443
661
  best_evaluation_result = evaluation_result
444
662
  end
445
663
 
664
+ finalize_trial_log_entry(
665
+ trial_logs,
666
+ trials_completed,
667
+ score: score,
668
+ evaluation_type: :full,
669
+ batch_size: batch_size,
670
+ total_eval_calls: total_eval_calls
671
+ )
672
+
446
673
  emit_event('trial_complete', {
447
674
  trial_number: trials_completed,
448
675
  score: score,
@@ -457,6 +684,16 @@ module DSPy
457
684
  end
458
685
 
459
686
  rescue => error
687
+ finalize_trial_log_entry(
688
+ trial_logs,
689
+ trials_completed,
690
+ score: nil,
691
+ evaluation_type: :full,
692
+ batch_size: batch_size,
693
+ total_eval_calls: total_eval_calls,
694
+ error: error.message
695
+ )
696
+
460
697
  emit_event('trial_error', {
461
698
  trial_number: trials_completed,
462
699
  error: error.message,
@@ -474,7 +711,11 @@ module DSPy
474
711
  best_evaluation_result: best_evaluation_result,
475
712
  trials_completed: trials_completed,
476
713
  optimization_state: optimization_state,
477
- evaluated_candidates: @evaluated_candidates
714
+ evaluated_candidates: @evaluated_candidates,
715
+ trial_logs: trial_logs,
716
+ param_score_dict: param_score_dict,
717
+ fully_evaled_param_combos: fully_evaled_param_combos,
718
+ total_eval_calls: total_eval_calls
478
719
  }
479
720
  end
480
721
 
@@ -487,62 +728,237 @@ module DSPy
487
728
  end
488
729
  def generate_candidate_configurations(proposal_result, demo_candidates)
489
730
  candidates = []
731
+ seen_signatures = Set.new
732
+
733
+ add_candidate = lambda do |instruction:, few_shot_examples:, type:, metadata:, config_id:|
734
+ signature = candidate_signature(type, instruction, metadata, few_shot_examples)
735
+ next if seen_signatures.include?(signature)
736
+
737
+ seen_signatures << signature
738
+ candidates << EvaluatedCandidate.new(
739
+ instruction: instruction,
740
+ few_shot_examples: few_shot_examples,
741
+ type: type,
742
+ metadata: metadata,
743
+ config_id: config_id
744
+ )
745
+ end
746
+
747
+ predictor_instruction_map = if proposal_result.respond_to?(:predictor_instructions) && proposal_result.predictor_instructions.any?
748
+ proposal_result.predictor_instructions
749
+ else
750
+ { 0 => proposal_result.candidate_instructions }
751
+ end
752
+
753
+ instruction_maps = build_instruction_maps(predictor_instruction_map)
754
+ demo_maps = build_demo_maps(demo_candidates)
490
755
 
491
756
  # Base configuration (no modifications)
492
- candidates << EvaluatedCandidate.new(
757
+ add_candidate.call(
493
758
  instruction: "",
494
759
  few_shot_examples: [],
495
760
  type: CandidateType::Baseline,
496
- metadata: {},
761
+ metadata: {
762
+ instructions_map: {},
763
+ demos_map: {}
764
+ },
497
765
  config_id: SecureRandom.hex(6)
498
766
  )
499
767
 
500
- # Instruction-only candidates
501
- proposal_result.candidate_instructions.each_with_index do |instruction, idx|
502
- candidates << EvaluatedCandidate.new(
503
- instruction: instruction,
768
+ instruction_maps.each_with_index do |instruction_map, combo_idx|
769
+ primary_instruction = instruction_map[0] || instruction_map.values.first || ""
770
+ add_candidate.call(
771
+ instruction: primary_instruction,
504
772
  few_shot_examples: [],
505
773
  type: CandidateType::InstructionOnly,
506
- metadata: { proposal_rank: idx },
774
+ metadata: {
775
+ proposal_rank: combo_idx,
776
+ instructions_map: duplicate_instruction_map(instruction_map),
777
+ demos_map: {}
778
+ },
507
779
  config_id: SecureRandom.hex(6)
508
780
  )
509
781
  end
510
782
 
511
- # Few-shot only candidates
512
- # Extract demo sets from first predictor (predictor index 0)
513
- demo_sets = demo_candidates[0] || []
514
- demo_sets.each_with_index do |demo_set, idx|
515
- candidates << EvaluatedCandidate.new(
783
+ demo_maps.each_with_index do |demo_map, idx|
784
+ next if demo_map.empty?
785
+
786
+ flattened_examples = demo_map.values.flatten
787
+ add_candidate.call(
516
788
  instruction: "",
517
- few_shot_examples: demo_set,
789
+ few_shot_examples: flattened_examples,
518
790
  type: CandidateType::FewShotOnly,
519
- metadata: { bootstrap_rank: idx },
791
+ metadata: {
792
+ bootstrap_rank: idx,
793
+ instructions_map: {},
794
+ demos_map: duplicate_demo_map(demo_map)
795
+ },
520
796
  config_id: SecureRandom.hex(6)
521
797
  )
522
798
  end
523
799
 
524
800
  # Combined candidates (instruction + few-shot)
525
- top_instructions = proposal_result.candidate_instructions.take(3)
526
- top_bootstrap_sets = demo_sets.take(3)
527
-
528
- top_instructions.each_with_index do |instruction, i_idx|
529
- top_bootstrap_sets.each_with_index do |candidate_set, b_idx|
530
- candidates << EvaluatedCandidate.new(
531
- instruction: instruction,
532
- few_shot_examples: candidate_set,
801
+ instruction_maps.each_with_index do |instruction_map, combo_idx|
802
+ primary_instruction = instruction_map[0] || instruction_map.values.first || ""
803
+ demo_maps.first(3).each_with_index do |demo_map, demo_idx|
804
+ next if demo_map.empty?
805
+
806
+ flattened_examples = demo_map.values.flatten
807
+ add_candidate.call(
808
+ instruction: primary_instruction,
809
+ few_shot_examples: flattened_examples,
533
810
  type: CandidateType::Combined,
534
- metadata: {
535
- instruction_rank: i_idx,
536
- bootstrap_rank: b_idx
811
+ metadata: {
812
+ instruction_rank: combo_idx,
813
+ bootstrap_rank: demo_idx,
814
+ instructions_map: duplicate_instruction_map(instruction_map),
815
+ demos_map: duplicate_demo_map(demo_map)
537
816
  },
538
817
  config_id: SecureRandom.hex(6)
539
818
  )
540
819
  end
541
820
  end
542
-
821
+
543
822
  candidates
544
823
  end
545
824
 
825
+ sig { params(predictor_instruction_map: T::Hash[Integer, T::Array[String]]).returns(T::Array[T::Hash[Integer, String]]) }
826
+ def build_instruction_maps(predictor_instruction_map)
827
+ return [{}] if predictor_instruction_map.nil? || predictor_instruction_map.empty?
828
+
829
+ normalized = predictor_instruction_map.each_with_object({}) do |(index, instructions), memo|
830
+ next if instructions.nil? || instructions.empty?
831
+ memo[index] = instructions.take(3)
832
+ end
833
+
834
+ return [{}] if normalized.empty?
835
+
836
+ cartesian_product(normalized)
837
+ end
838
+
839
+ sig do
840
+ params(demo_candidates: T::Hash[Integer, T::Array[T::Array[DSPy::FewShotExample]]]).returns(T::Array[T::Hash[Integer, T::Array[DSPy::FewShotExample]]])
841
+ end
842
+ def build_demo_maps(demo_candidates)
843
+ return [{}] if demo_candidates.nil? || demo_candidates.empty?
844
+
845
+ normalized = demo_candidates.each_with_object({}) do |(index, sets), memo|
846
+ next if sets.nil? || sets.empty?
847
+ memo[index] = sets.take(3)
848
+ end
849
+
850
+ return [{}] if normalized.empty?
851
+
852
+ cartesian_product(normalized)
853
+ end
854
+
855
+ sig do
856
+ params(options_hash: T::Hash[Integer, T::Array[T.untyped]]).returns(T::Array[T::Hash[Integer, T.untyped]])
857
+ end
858
+ def cartesian_product(options_hash)
859
+ options_hash.sort_by { |index, _| index }.reduce([{}]) do |acc, (index, values)|
860
+ next acc if values.nil? || values.empty?
861
+
862
+ acc.flat_map do |existing|
863
+ values.map do |value|
864
+ existing.merge(index => value)
865
+ end
866
+ end
867
+ end
868
+ end
869
+
870
+ sig { params(instruction_map: T::Hash[Integer, String]).returns(T::Hash[Integer, String]) }
871
+ def duplicate_instruction_map(instruction_map)
872
+ instruction_map.each_with_object({}) do |(index, instruction), memo|
873
+ memo[index] = instruction.is_a?(String) ? instruction.dup : instruction
874
+ end
875
+ end
876
+
877
+ sig do
878
+ params(demo_map: T::Hash[Integer, T::Array[DSPy::FewShotExample]]).returns(T::Hash[Integer, T::Array[DSPy::FewShotExample]])
879
+ end
880
+ def duplicate_demo_map(demo_map)
881
+ demo_map.each_with_object({}) do |(index, demos), memo|
882
+ next if demos.nil?
883
+ memo[index] = demos.map { |demo| demo }
884
+ end
885
+ end
886
+
887
+ sig do
888
+ params(
889
+ type: CandidateType,
890
+ instruction: String,
891
+ metadata: T::Hash[Symbol, T.untyped],
892
+ few_shot_examples: T::Array[T.untyped]
893
+ ).returns(String)
894
+ end
895
+ def candidate_signature(type, instruction, metadata, few_shot_examples)
896
+ JSON.generate(
897
+ type: type.serialize,
898
+ instruction: instruction,
899
+ instructions_map: normalize_instruction_map(metadata[:instructions_map] || {}),
900
+ demos_map: normalize_demo_map(metadata[:demos_map] || {}),
901
+ few_shot_examples: few_shot_examples.map { |example| serialize_few_shot_example(example) }
902
+ )
903
+ end
904
+
905
+ sig { params(map: T::Hash[Integer, T.untyped]).returns(T::Hash[Integer, String]) }
906
+ def normalize_instruction_map(map)
907
+ map.sort_by { |index, _| index }.each_with_object({}) do |(index, value), memo|
908
+ memo[index] = value.to_s
909
+ end
910
+ end
911
+
912
+ sig { params(map: T::Hash[Integer, T::Array[T.untyped]]).returns(T::Hash[Integer, T::Array[T.untyped]]) }
913
+ def normalize_demo_map(map)
914
+ map.sort_by { |index, _| index }.each_with_object({}) do |(index, demos), memo|
915
+ memo[index] = Array(demos).map { |demo| serialize_few_shot_example(demo) }
916
+ end
917
+ end
918
+
919
+ sig { params(example: T.untyped).returns(T.untyped) }
920
+ def serialize_few_shot_example(example)
921
+ case example
922
+ when DSPy::FewShotExample
923
+ deep_dup(example.to_h)
924
+ when DSPy::Example
925
+ {
926
+ input: deep_dup(example.input_values),
927
+ expected: deep_dup(example.expected_values)
928
+ }
929
+ when Hash
930
+ deep_dup(example)
931
+ else
932
+ example
933
+ end
934
+ end
935
+
936
+ sig { params(examples: T::Array[T.untyped]).returns(T::Array[DSPy::FewShotExample]) }
937
+ def normalize_few_shot_examples(examples)
938
+ examples.map do |example|
939
+ if example.is_a?(DSPy::FewShotExample)
940
+ example
941
+ elsif example.is_a?(DSPy::Example)
942
+ DSPy::FewShotExample.new(
943
+ input: example.input_values,
944
+ output: example.expected_values,
945
+ reasoning: extract_reasoning_from_example(example)
946
+ )
947
+ else
948
+ example
949
+ end
950
+ end
951
+ end
952
+
953
+ sig { params(predictor: T.untyped, examples: T::Array[DSPy::FewShotExample]).void }
954
+ def assign_predictor_examples(predictor, examples)
955
+ predictor.demos = examples if predictor.respond_to?(:demos=)
956
+ return unless predictor.respond_to?(:prompt)
957
+
958
+ cloned_examples = examples.map { |ex| ex }
959
+ predictor.prompt.instance_variable_set(:@few_shot_examples, cloned_examples.freeze)
960
+ end
961
+
546
962
  # Initialize optimization state for candidate selection
547
963
  sig { params(candidates: T::Array[EvaluatedCandidate]).returns(T::Hash[Symbol, T.untyped]) }
548
964
  def initialize_optimization_state(candidates)
@@ -722,7 +1138,11 @@ module DSPy
722
1138
  modified_program = apply_candidate_configuration(program, candidate)
723
1139
 
724
1140
  # Evaluate modified program
725
- evaluation_result = evaluate_program(modified_program, evaluation_set)
1141
+ evaluation_result = if use_concurrent_evaluation?(evaluation_set)
1142
+ evaluate_candidate_concurrently(modified_program, evaluation_set)
1143
+ else
1144
+ evaluate_program(modified_program, evaluation_set)
1145
+ end
726
1146
 
727
1147
  # Store evaluation details
728
1148
  @evaluated_candidates << candidate
@@ -730,32 +1150,131 @@ module DSPy
730
1150
  [evaluation_result.pass_rate, modified_program, evaluation_result]
731
1151
  end
732
1152
 
1153
+ sig { params(evaluation_set: T::Array[DSPy::Example]).returns(T::Boolean) }
1154
+ def use_concurrent_evaluation?(evaluation_set)
1155
+ minibatch_size = config.minibatch_size
1156
+ return false unless minibatch_size&.positive?
1157
+ return false unless config.num_threads && config.num_threads > 1
1158
+
1159
+ evaluation_set.size > minibatch_size
1160
+ end
1161
+
1162
+ sig do
1163
+ params(
1164
+ modified_program: T.untyped,
1165
+ evaluation_set: T::Array[DSPy::Example]
1166
+ ).returns(DSPy::Evaluate::BatchEvaluationResult)
1167
+ end
1168
+ def evaluate_candidate_concurrently(modified_program, evaluation_set)
1169
+ chunk_size = T.must(config.minibatch_size)
1170
+ chunks = evaluation_set.each_slice(chunk_size).map(&:dup)
1171
+ return evaluate_program(modified_program, evaluation_set) if chunks.size <= 1
1172
+
1173
+ pool_size = [config.num_threads, chunks.size].min
1174
+ pool_size = 1 if pool_size <= 0
1175
+ executor = Concurrent::FixedThreadPool.new(pool_size)
1176
+
1177
+ futures = chunks.map do |chunk|
1178
+ Concurrent::Promises.future_on(executor) do
1179
+ evaluate_program(modified_program, chunk)
1180
+ end
1181
+ end
1182
+
1183
+ results = futures.map(&:value!)
1184
+ combine_batch_results(results)
1185
+ ensure
1186
+ if executor
1187
+ executor.shutdown
1188
+ executor.wait_for_termination
1189
+ end
1190
+ end
1191
+
1192
+ sig do
1193
+ params(batch_results: T::Array[DSPy::Evaluate::BatchEvaluationResult]).returns(DSPy::Evaluate::BatchEvaluationResult)
1194
+ end
1195
+ def combine_batch_results(batch_results)
1196
+ return DSPy::Evaluate::BatchEvaluationResult.new(results: [], aggregated_metrics: {}) if batch_results.empty?
1197
+
1198
+ combined_results = batch_results.flat_map(&:results)
1199
+ total_examples = batch_results.sum(&:total_examples)
1200
+ aggregated_metrics = merge_aggregated_metrics(batch_results, total_examples)
1201
+
1202
+ DSPy::Evaluate::BatchEvaluationResult.new(
1203
+ results: combined_results,
1204
+ aggregated_metrics: aggregated_metrics
1205
+ )
1206
+ end
1207
+
1208
+ sig do
1209
+ params(
1210
+ batch_results: T::Array[DSPy::Evaluate::BatchEvaluationResult],
1211
+ total_examples: Integer
1212
+ ).returns(T::Hash[Symbol, T.untyped])
1213
+ end
1214
+ def merge_aggregated_metrics(batch_results, total_examples)
1215
+ return {} if total_examples.zero?
1216
+
1217
+ keys = batch_results.flat_map { |res| res.aggregated_metrics.keys }.uniq
1218
+ keys.each_with_object({}) do |key, memo|
1219
+ numeric_weight = 0.0
1220
+ numeric_sum = 0.0
1221
+ fallback_value = nil
1222
+
1223
+ batch_results.each do |res|
1224
+ value = res.aggregated_metrics[key]
1225
+ next if value.nil?
1226
+
1227
+ if value.is_a?(Numeric)
1228
+ numeric_sum += value.to_f * res.total_examples
1229
+ numeric_weight += res.total_examples
1230
+ else
1231
+ fallback_value = value
1232
+ end
1233
+ end
1234
+
1235
+ if numeric_weight.positive?
1236
+ memo[key] = numeric_sum / numeric_weight
1237
+ elsif fallback_value
1238
+ memo[key] = fallback_value
1239
+ end
1240
+ end
1241
+ end
1242
+
733
1243
  # Apply candidate configuration to program
734
1244
  sig { params(program: T.untyped, candidate: EvaluatedCandidate).returns(T.untyped) }
735
1245
  def apply_candidate_configuration(program, candidate)
1246
+ instructions_map = candidate.metadata[:instructions_map] || {}
1247
+ demos_map = candidate.metadata[:demos_map] || {}
1248
+
736
1249
  modified_program = program
737
-
738
- # Apply instruction if provided
739
- if !candidate.instruction.empty? && program.respond_to?(:with_instruction)
740
- modified_program = modified_program.with_instruction(candidate.instruction)
741
- end
742
-
743
- # Apply few-shot examples if provided
744
- if candidate.few_shot_examples.any? && program.respond_to?(:with_examples)
745
- few_shot_examples = candidate.few_shot_examples.map do |example|
746
- # If already a FewShotExample, use it directly
747
- if example.is_a?(DSPy::FewShotExample)
748
- example
749
- else
750
- # Convert from DSPy::Example
751
- DSPy::FewShotExample.new(
752
- input: example.input_values,
753
- output: example.expected_values,
754
- reasoning: extract_reasoning_from_example(example)
755
- )
1250
+ if modified_program.respond_to?(:predictors) && (instructions_map.any? || demos_map.any?)
1251
+ modified_program = modified_program.clone
1252
+ modified_program.predictors.each_with_index do |predictor, idx|
1253
+ if instructions_map.key?(idx)
1254
+ signature = Utils.get_signature(predictor)
1255
+ updated_signature = signature.with_instructions(instructions_map[idx])
1256
+ Utils.set_signature(predictor, updated_signature)
1257
+ end
1258
+
1259
+ if demos_map.key?(idx)
1260
+ normalized_examples = normalize_few_shot_examples(demos_map[idx])
1261
+ assign_predictor_examples(predictor, normalized_examples)
756
1262
  end
757
1263
  end
758
- modified_program = modified_program.with_examples(few_shot_examples)
1264
+ end
1265
+
1266
+ # Apply instruction if provided (top-level programs still respect with_instruction)
1267
+ if !candidate.instruction.empty? && modified_program.respond_to?(:with_instruction)
1268
+ modified_program = modified_program.with_instruction(candidate.instruction)
1269
+ end
1270
+
1271
+ should_apply_global_examples = candidate.few_shot_examples.any? &&
1272
+ modified_program.respond_to?(:with_examples) &&
1273
+ (demos_map.empty? || !modified_program.respond_to?(:predictors))
1274
+
1275
+ if should_apply_global_examples
1276
+ normalized_few_shot = normalize_few_shot_examples(candidate.few_shot_examples)
1277
+ modified_program = modified_program.with_examples(normalized_few_shot)
759
1278
  end
760
1279
 
761
1280
  modified_program
@@ -824,14 +1343,16 @@ module DSPy
824
1343
 
825
1344
  history = {
826
1345
  total_trials: optimization_result[:trials_completed],
827
- optimization_strategy: config.optimization_strategy,
1346
+ optimization_strategy: optimization_strategy_name,
828
1347
  early_stopped: optimization_result[:trials_completed] < config.num_trials,
829
- score_history: optimization_result[:optimization_state][:best_score_history]
1348
+ score_history: optimization_result[:optimization_state][:best_score_history],
1349
+ total_eval_calls: optimization_result[:total_eval_calls]
830
1350
  }
831
1351
 
832
1352
  metadata = {
833
1353
  optimizer: "MIPROv2",
834
1354
  auto_mode: infer_auto_mode,
1355
+ optimization_strategy: optimization_strategy_name,
835
1356
  best_instruction: best_candidate&.instruction || "",
836
1357
  best_few_shot_count: best_candidate&.few_shot_examples&.size || 0,
837
1358
  best_candidate_type: best_candidate&.type&.serialize || "unknown",
@@ -839,12 +1360,21 @@ module DSPy
839
1360
  }
840
1361
 
841
1362
  # Create bootstrap statistics from demo_candidates
842
- demo_sets = demo_candidates[0] || []
1363
+ num_predictors = demo_candidates.keys.size
1364
+ sets_per_predictor = demo_candidates.values.map(&:size)
1365
+ all_demo_sets = demo_candidates.values.flat_map { |sets| sets }
843
1366
  bootstrap_statistics = {
844
- num_predictors: demo_candidates.keys.size,
845
- demo_sets_per_predictor: demo_sets.size,
846
- avg_demos_per_set: demo_sets.empty? ? 0 : demo_sets.map(&:size).sum.to_f / demo_sets.size
1367
+ num_predictors: num_predictors,
1368
+ demo_sets_per_predictor: sets_per_predictor.max || 0,
1369
+ avg_demos_per_set: all_demo_sets.empty? ? 0 : all_demo_sets.map(&:size).sum.to_f / all_demo_sets.size
847
1370
  }
1371
+ bootstrap_statistics[:per_predictor_demo_counts] = sets_per_predictor if sets_per_predictor.any?
1372
+
1373
+ optimization_trace = serialize_optimization_trace(optimization_result[:optimization_state])
1374
+ optimization_trace[:trial_logs] = serialize_trial_logs(optimization_result[:trial_logs])
1375
+ optimization_trace[:param_score_dict] = serialize_param_score_dict(optimization_result[:param_score_dict])
1376
+ optimization_trace[:fully_evaled_param_combos] = serialize_fully_evaled_param_combos(optimization_result[:fully_evaled_param_combos])
1377
+ optimization_trace[:total_eval_calls] = optimization_result[:total_eval_calls]
848
1378
 
849
1379
  MIPROv2Result.new(
850
1380
  optimized_program: best_program,
@@ -854,7 +1384,7 @@ module DSPy
854
1384
  best_score_value: best_score,
855
1385
  metadata: metadata,
856
1386
  evaluated_candidates: @evaluated_candidates,
857
- optimization_trace: serialize_optimization_trace(optimization_result[:optimization_state]),
1387
+ optimization_trace: optimization_trace,
858
1388
  bootstrap_statistics: bootstrap_statistics,
859
1389
  proposal_statistics: proposal_result.analysis,
860
1390
  best_evaluation_result: best_evaluation_result
@@ -876,7 +1406,205 @@ module DSPy
876
1406
  serialized_trace
877
1407
  end
878
1408
 
1409
+ sig do
1410
+ params(
1411
+ trial_number: Integer,
1412
+ candidate: EvaluatedCandidate,
1413
+ evaluation_type: Symbol,
1414
+ batch_size: Integer
1415
+ ).returns(T::Hash[Symbol, T.untyped])
1416
+ end
1417
+ def create_trial_log_entry(trial_number:, candidate:, evaluation_type:, batch_size:)
1418
+ # Preserve interface parity with Python implementation (trial number stored implicitly via hash key)
1419
+ trial_number # no-op to acknowledge parameter usage
1420
+ instructions_map = candidate.metadata[:instructions_map] || {}
1421
+ demos_map = candidate.metadata[:demos_map] || {}
1422
+ entry = {
1423
+ candidate_id: candidate.config_id,
1424
+ candidate_type: candidate.type.serialize,
1425
+ instruction_preview: candidate.instruction.to_s[0, 160],
1426
+ few_shot_count: candidate.few_shot_examples.size,
1427
+ metadata: deep_dup(candidate.metadata),
1428
+ evaluation_type: evaluation_type,
1429
+ batch_size: batch_size,
1430
+ status: :in_progress,
1431
+ started_at: Time.now.iso8601
1432
+ }
1433
+ if instructions_map.any?
1434
+ entry[:instructions] = duplicate_instruction_map(instructions_map)
1435
+ entry[:instruction] = entry[:instructions][0] if entry[:instructions].key?(0)
1436
+ elsif candidate.instruction && !candidate.instruction.empty?
1437
+ predictor_index = candidate.metadata[:predictor_index] || 0
1438
+ entry[:instruction] = candidate.instruction
1439
+ entry[:instructions] = { predictor_index => candidate.instruction }
1440
+ end
1441
+ entry[:few_shot_map] = duplicate_demo_map(demos_map) if demos_map.any?
1442
+ entry
1443
+ end
1444
+
1445
+ sig do
1446
+ params(
1447
+ trial_logs: T::Hash[Integer, T::Hash[Symbol, T.untyped]],
1448
+ trial_number: Integer,
1449
+ score: T.nilable(Float),
1450
+ evaluation_type: Symbol,
1451
+ batch_size: Integer,
1452
+ total_eval_calls: Integer,
1453
+ error: T.nilable(String)
1454
+ ).void
1455
+ end
1456
+ def finalize_trial_log_entry(trial_logs, trial_number, score:, evaluation_type:, batch_size:, total_eval_calls:, error: nil)
1457
+ entry = trial_logs[trial_number] || {}
1458
+ entry[:score] = score if score
1459
+ entry[:evaluation_type] = evaluation_type
1460
+ entry[:batch_size] = batch_size
1461
+ entry[:total_eval_calls] = total_eval_calls
1462
+ entry[:status] = error ? :error : :completed
1463
+ entry[:error] = error if error
1464
+ entry[:completed_at] = Time.now.iso8601
1465
+ trial_logs[trial_number] = entry
1466
+ end
1467
+
1468
+ sig do
1469
+ params(
1470
+ param_score_dict: T::Hash[String, T::Array[T::Hash[Symbol, T.untyped]]],
1471
+ candidate: EvaluatedCandidate,
1472
+ score: Float,
1473
+ evaluation_type: Symbol,
1474
+ instructions: T.nilable(T::Hash[Integer, String])
1475
+ ).void
1476
+ end
1477
+ def record_param_score(param_score_dict, candidate, score, evaluation_type:, instructions: nil)
1478
+ instructions_hash = instructions || {}
1479
+ if instructions_hash.empty? && candidate.instruction && !candidate.instruction.empty?
1480
+ predictor_index = candidate.metadata[:predictor_index] || 0
1481
+ instructions_hash[predictor_index] = candidate.instruction
1482
+ end
1483
+
1484
+ record = {
1485
+ candidate_id: candidate.config_id,
1486
+ candidate_type: candidate.type.serialize,
1487
+ score: score,
1488
+ evaluation_type: evaluation_type,
1489
+ timestamp: Time.now.iso8601,
1490
+ metadata: deep_dup(candidate.metadata)
1491
+ }
1492
+ primary_instruction = instructions_hash[0] || candidate.instruction
1493
+ record[:instruction] = primary_instruction if primary_instruction && !primary_instruction.empty?
1494
+ record[:instructions] = instructions_hash unless instructions_hash.empty?
1495
+
1496
+ param_score_dict[candidate.config_id] << record
1497
+ end
1498
+
1499
+ sig do
1500
+ params(
1501
+ fully_evaled_param_combos: T::Hash[String, T::Hash[Symbol, T.untyped]],
1502
+ candidate: EvaluatedCandidate,
1503
+ score: Float,
1504
+ instructions: T.nilable(T::Hash[Integer, String])
1505
+ ).void
1506
+ end
1507
+ def update_fully_evaled_param_combos(fully_evaled_param_combos, candidate, score, instructions: nil)
1508
+ existing = fully_evaled_param_combos[candidate.config_id]
1509
+ if existing.nil? || score > existing[:score]
1510
+ instructions_hash = instructions || {}
1511
+ if instructions_hash.empty? && candidate.instruction && !candidate.instruction.empty?
1512
+ predictor_index = candidate.metadata[:predictor_index] || 0
1513
+ instructions_hash[predictor_index] = candidate.instruction
1514
+ end
1515
+
1516
+ fully_evaled_param_combos[candidate.config_id] = {
1517
+ candidate_id: candidate.config_id,
1518
+ candidate_type: candidate.type.serialize,
1519
+ score: score,
1520
+ metadata: deep_dup(candidate.metadata),
1521
+ updated_at: Time.now.iso8601
1522
+ }
1523
+ unless instructions_hash.empty?
1524
+ fully_evaled_param_combos[candidate.config_id][:instructions] = instructions_hash
1525
+ fully_evaled_param_combos[candidate.config_id][:instruction] = instructions_hash[0] || candidate.instruction
1526
+ end
1527
+ end
1528
+ end
1529
+
1530
+ sig { params(trial_logs: T.nilable(T::Hash[Integer, T::Hash[Symbol, T.untyped]])).returns(T::Hash[Integer, T::Hash[Symbol, T.untyped]]) }
1531
+ def serialize_trial_logs(trial_logs)
1532
+ return {} unless trial_logs
1533
+
1534
+ allowed_keys = [
1535
+ :candidate_id,
1536
+ :candidate_type,
1537
+ :instruction_preview,
1538
+ :instruction,
1539
+ :instructions,
1540
+ :few_shot_count,
1541
+ :metadata,
1542
+ :evaluation_type,
1543
+ :batch_size,
1544
+ :score,
1545
+ :status,
1546
+ :error,
1547
+ :started_at,
1548
+ :completed_at,
1549
+ :total_eval_calls
1550
+ ]
1551
+
1552
+ trial_logs.transform_values do |entry|
1553
+ entry.each_with_object({}) do |(key, value), memo|
1554
+ memo[key] = value if allowed_keys.include?(key)
1555
+ end
1556
+ end
1557
+ end
1558
+
1559
+ sig { params(param_score_dict: T.nilable(T::Hash[String, T::Array[T::Hash[Symbol, T.untyped]]])).returns(T::Hash[String, T::Array[T::Hash[Symbol, T.untyped]]]) }
1560
+ def serialize_param_score_dict(param_score_dict)
1561
+ return {} unless param_score_dict
1562
+
1563
+ allowed_keys = [:candidate_id, :candidate_type, :score, :evaluation_type, :timestamp, :metadata, :instruction, :instructions]
1564
+
1565
+ param_score_dict.transform_values do |records|
1566
+ records.map do |record|
1567
+ record.each_with_object({}) do |(key, value), memo|
1568
+ memo[key] = value if allowed_keys.include?(key)
1569
+ end
1570
+ end
1571
+ end
1572
+ end
1573
+
1574
+ sig { params(fully_evaled_param_combos: T.nilable(T::Hash[String, T::Hash[Symbol, T.untyped]])).returns(T::Hash[String, T::Hash[Symbol, T.untyped]]) }
1575
+ def serialize_fully_evaled_param_combos(fully_evaled_param_combos)
1576
+ return {} unless fully_evaled_param_combos
1577
+
1578
+ allowed_keys = [:candidate_id, :candidate_type, :score, :metadata, :updated_at, :instruction, :instructions]
1579
+
1580
+ fully_evaled_param_combos.transform_values do |record|
1581
+ record.each_with_object({}) do |(key, value), memo|
1582
+ memo[key] = value if allowed_keys.include?(key)
1583
+ end
1584
+ end
1585
+ end
1586
+
1587
+ sig { params(value: T.untyped).returns(T.untyped) }
1588
+ def deep_dup(value)
1589
+ case value
1590
+ when Hash
1591
+ value.each_with_object({}) { |(k, v), memo| memo[k] = deep_dup(v) }
1592
+ when Array
1593
+ value.map { |element| deep_dup(element) }
1594
+ else
1595
+ value
1596
+ end
1597
+ end
1598
+
879
1599
  # Helper methods
1600
+ sig { returns(String) }
1601
+ def optimization_strategy_name
1602
+ strategy = config.optimization_strategy
1603
+ return strategy.serialize if strategy.respond_to?(:serialize)
1604
+
1605
+ strategy.to_s
1606
+ end
1607
+
880
1608
  sig { params(program: T.untyped).returns(T.nilable(String)) }
881
1609
  def extract_current_instruction(program)
882
1610
  if program.respond_to?(:prompt) && program.prompt.respond_to?(:instruction)
@@ -889,6 +1617,23 @@ module DSPy
889
1617
  end
890
1618
  end
891
1619
 
1620
+ sig { params(program: T.untyped).returns(T::Hash[Integer, String]) }
1621
+ def extract_program_instructions(program)
1622
+ instructions = {}
1623
+ if program.respond_to?(:predictors)
1624
+ program.predictors.each_with_index do |predictor, index|
1625
+ if predictor.respond_to?(:prompt) && predictor.prompt.respond_to?(:instruction)
1626
+ value = predictor.prompt.instruction
1627
+ instructions[index] = value if value
1628
+ end
1629
+ end
1630
+ else
1631
+ fallback_instruction = extract_current_instruction(program)
1632
+ instructions[0] = fallback_instruction if fallback_instruction
1633
+ end
1634
+ instructions
1635
+ end
1636
+
892
1637
  sig { params(program: T.untyped).returns(T.nilable(T.class_of(DSPy::Signature))) }
893
1638
  def extract_signature_class(program)
894
1639
  program.respond_to?(:signature_class) ? program.signature_class : nil
@@ -913,12 +1658,15 @@ module DSPy
913
1658
  # Infer auto mode based on configuration
914
1659
  sig { returns(String) }
915
1660
  def infer_auto_mode
1661
+ return config.auto_preset.serialize unless config.auto_preset == AutoPreset::None
1662
+
916
1663
  case config.num_trials
917
1664
  when 0..6 then "light"
918
1665
  when 7..12 then "medium"
919
- else "heavy"
1666
+ when 13..Float::INFINITY then "heavy"
1667
+ else "manual"
920
1668
  end
921
1669
  end
922
1670
  end
923
1671
  end
924
- end
1672
+ end