dspy 0.28.2 → 0.29.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (41) hide show
  1. checksums.yaml +4 -4
  2. data/README.md +2 -3
  3. data/lib/dspy/code_act.rb +14 -1
  4. data/lib/dspy/datasets/ade.rb +90 -0
  5. data/lib/dspy/datasets.rb +8 -0
  6. data/lib/dspy/lm.rb +4 -8
  7. data/lib/dspy/mixins/struct_builder.rb +17 -25
  8. data/lib/dspy/module.rb +12 -1
  9. data/lib/dspy/observability/async_span_processor.rb +67 -93
  10. data/lib/dspy/observability.rb +43 -1
  11. data/lib/dspy/predict.rb +10 -0
  12. data/lib/dspy/propose/dataset_summary_generator.rb +36 -3
  13. data/lib/dspy/propose/grounded_proposer.rb +118 -11
  14. data/lib/dspy/re_act.rb +13 -0
  15. data/lib/dspy/reflection_lm.rb +36 -0
  16. data/lib/dspy/teleprompt/gepa.rb +448 -2803
  17. data/lib/dspy/teleprompt/mipro_v2.rb +564 -65
  18. data/lib/dspy/teleprompt/utils.rb +8 -3
  19. data/lib/dspy/version.rb +2 -2
  20. data/lib/dspy.rb +3 -2
  21. data/lib/gepa/api.rb +61 -0
  22. data/lib/gepa/core/engine.rb +226 -0
  23. data/lib/gepa/core/evaluation_batch.rb +26 -0
  24. data/lib/gepa/core/result.rb +92 -0
  25. data/lib/gepa/core/state.rb +231 -0
  26. data/lib/gepa/logging/experiment_tracker.rb +54 -0
  27. data/lib/gepa/logging/logger.rb +57 -0
  28. data/lib/gepa/logging.rb +9 -0
  29. data/lib/gepa/proposer/base.rb +27 -0
  30. data/lib/gepa/proposer/merge_proposer.rb +424 -0
  31. data/lib/gepa/proposer/reflective_mutation/base.rb +48 -0
  32. data/lib/gepa/proposer/reflective_mutation/reflective_mutation.rb +188 -0
  33. data/lib/gepa/strategies/batch_sampler.rb +91 -0
  34. data/lib/gepa/strategies/candidate_selector.rb +97 -0
  35. data/lib/gepa/strategies/component_selector.rb +57 -0
  36. data/lib/gepa/strategies/instruction_proposal.rb +120 -0
  37. data/lib/gepa/telemetry.rb +122 -0
  38. data/lib/gepa/utils/pareto.rb +119 -0
  39. data/lib/gepa.rb +21 -0
  40. metadata +42 -4
  41. data/lib/dspy/teleprompt/simple_optimizer.rb +0 -503
@@ -1,7 +1,10 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  require 'digest'
4
+ require 'time'
5
+ require 'concurrent-ruby'
4
6
  require 'sorbet-runtime'
7
+ require 'securerandom'
5
8
  require_relative 'teleprompter'
6
9
  require_relative 'utils'
7
10
  require_relative '../propose/grounded_proposer'
@@ -124,6 +127,7 @@ module DSPy
124
127
  setting :track_diversity, default: true
125
128
  setting :max_errors, default: 3
126
129
  setting :num_threads, default: 1
130
+ setting :minibatch_size, default: nil
127
131
 
128
132
  # Class-level configuration method - sets defaults for new instances
129
133
  def self.configure(&block)
@@ -265,6 +269,7 @@ module DSPy
265
269
  @proposer = DSPy::Propose::GroundedProposer.new(config: DSPy::Propose::GroundedProposer::Config.new)
266
270
  @optimization_trace = []
267
271
  @evaluated_candidates = []
272
+ @trial_history = {}
268
273
  end
269
274
 
270
275
  # Main MIPROv2 optimization method
@@ -282,7 +287,7 @@ module DSPy
282
287
  trainset_size: trainset.size,
283
288
  valset_size: valset&.size || 0,
284
289
  num_trials: config.num_trials,
285
- optimization_strategy: config.optimization_strategy,
290
+ optimization_strategy: optimization_strategy_name,
286
291
  mode: infer_auto_mode
287
292
  }) do
288
293
  # Convert examples to typed format
@@ -331,6 +336,8 @@ module DSPy
331
336
  proposal_result
332
337
  )
333
338
 
339
+ @trial_history = optimization_result[:trial_logs] || {}
340
+
334
341
  save_results(final_result)
335
342
  final_result
336
343
  end
@@ -368,10 +375,6 @@ module DSPy
368
375
  # Flatten demo sets from first predictor and take first 5 examples
369
376
  few_shot_examples = demo_candidates[0]&.flatten&.take(5) || []
370
377
 
371
- # Get signature class from program
372
- signature_class = extract_signature_class(program)
373
- raise ArgumentError, "Cannot extract signature class from program" unless signature_class
374
-
375
378
  # Re-initialize proposer with program and trainset for awareness features
376
379
  # This enables program_aware and use_dataset_summary flags to work correctly
377
380
  proposer_config = DSPy::Propose::GroundedProposer::Config.new
@@ -383,11 +386,12 @@ module DSPy
383
386
  trainset: trainset
384
387
  )
385
388
 
386
- @proposer.propose_instructions(
387
- signature_class,
388
- trainset,
389
- few_shot_examples: few_shot_examples,
390
- current_instruction: current_instruction
389
+ @proposer.propose_instructions_for_program(
390
+ trainset: trainset,
391
+ program: program,
392
+ demo_candidates: demo_candidates,
393
+ trial_logs: @trial_history,
394
+ num_instruction_candidates: config.num_instruction_candidates
391
395
  )
392
396
  end
393
397
 
@@ -406,12 +410,18 @@ module DSPy
406
410
 
407
411
  # Initialize optimization state
408
412
  optimization_state = initialize_optimization_state(candidates)
409
-
413
+
414
+ # Initialize trial tracking structures
415
+ trial_logs = {}
416
+ param_score_dict = Hash.new { |hash, key| hash[key] = [] }
417
+ fully_evaled_param_combos = {}
418
+ total_eval_calls = 0
419
+
410
420
  # Run optimization trials
411
421
  trials_completed = 0
412
422
  best_score = 0.0
413
423
  best_candidate = nil
414
- best_program = nil
424
+ best_program = program
415
425
  best_evaluation_result = nil
416
426
 
417
427
  config.num_trials.times do |trial_idx|
@@ -419,6 +429,14 @@ module DSPy
419
429
 
420
430
  # Select next candidate based on optimization strategy
421
431
  candidate = select_next_candidate(candidates, optimization_state, trial_idx)
432
+ batch_size = evaluation_set.size
433
+
434
+ trial_logs[trials_completed] = create_trial_log_entry(
435
+ trial_number: trials_completed,
436
+ candidate: candidate,
437
+ evaluation_type: :full,
438
+ batch_size: batch_size
439
+ )
422
440
 
423
441
  emit_event('trial_start', {
424
442
  trial_number: trials_completed,
@@ -430,12 +448,30 @@ module DSPy
430
448
  begin
431
449
  # Evaluate candidate
432
450
  score, modified_program, evaluation_result = evaluate_candidate(program, candidate, evaluation_set)
451
+ total_eval_calls += batch_size
452
+
453
+ instructions_snapshot = extract_program_instructions(modified_program)
454
+ trial_logs[trials_completed][:instructions] = instructions_snapshot unless instructions_snapshot.empty?
455
+ trial_logs[trials_completed][:instruction] = instructions_snapshot[0] if instructions_snapshot.key?(0)
433
456
 
434
457
  # Update optimization state
435
458
  update_optimization_state(optimization_state, candidate, score)
459
+ record_param_score(
460
+ param_score_dict,
461
+ candidate,
462
+ score,
463
+ evaluation_type: :full,
464
+ instructions: instructions_snapshot
465
+ )
466
+ update_fully_evaled_param_combos(
467
+ fully_evaled_param_combos,
468
+ candidate,
469
+ score,
470
+ instructions: instructions_snapshot
471
+ )
436
472
 
437
473
  # Track best result
438
- is_best = score > best_score
474
+ is_best = best_candidate.nil? || score > best_score
439
475
  if is_best
440
476
  best_score = score
441
477
  best_candidate = candidate
@@ -443,6 +479,15 @@ module DSPy
443
479
  best_evaluation_result = evaluation_result
444
480
  end
445
481
 
482
+ finalize_trial_log_entry(
483
+ trial_logs,
484
+ trials_completed,
485
+ score: score,
486
+ evaluation_type: :full,
487
+ batch_size: batch_size,
488
+ total_eval_calls: total_eval_calls
489
+ )
490
+
446
491
  emit_event('trial_complete', {
447
492
  trial_number: trials_completed,
448
493
  score: score,
@@ -457,6 +502,16 @@ module DSPy
457
502
  end
458
503
 
459
504
  rescue => error
505
+ finalize_trial_log_entry(
506
+ trial_logs,
507
+ trials_completed,
508
+ score: nil,
509
+ evaluation_type: :full,
510
+ batch_size: batch_size,
511
+ total_eval_calls: total_eval_calls,
512
+ error: error.message
513
+ )
514
+
460
515
  emit_event('trial_error', {
461
516
  trial_number: trials_completed,
462
517
  error: error.message,
@@ -474,7 +529,11 @@ module DSPy
474
529
  best_evaluation_result: best_evaluation_result,
475
530
  trials_completed: trials_completed,
476
531
  optimization_state: optimization_state,
477
- evaluated_candidates: @evaluated_candidates
532
+ evaluated_candidates: @evaluated_candidates,
533
+ trial_logs: trial_logs,
534
+ param_score_dict: param_score_dict,
535
+ fully_evaled_param_combos: fully_evaled_param_combos,
536
+ total_eval_calls: total_eval_calls
478
537
  }
479
538
  end
480
539
 
@@ -488,61 +547,172 @@ module DSPy
488
547
  def generate_candidate_configurations(proposal_result, demo_candidates)
489
548
  candidates = []
490
549
 
550
+ predictor_instruction_map = if proposal_result.respond_to?(:predictor_instructions) && proposal_result.predictor_instructions.any?
551
+ proposal_result.predictor_instructions
552
+ else
553
+ { 0 => proposal_result.candidate_instructions }
554
+ end
555
+
556
+ instruction_maps = build_instruction_maps(predictor_instruction_map)
557
+ demo_maps = build_demo_maps(demo_candidates)
558
+
491
559
  # Base configuration (no modifications)
492
560
  candidates << EvaluatedCandidate.new(
493
561
  instruction: "",
494
562
  few_shot_examples: [],
495
563
  type: CandidateType::Baseline,
496
- metadata: {},
564
+ metadata: {
565
+ instructions_map: {},
566
+ demos_map: {}
567
+ },
497
568
  config_id: SecureRandom.hex(6)
498
569
  )
499
570
 
500
- # Instruction-only candidates
501
- proposal_result.candidate_instructions.each_with_index do |instruction, idx|
571
+ instruction_maps.each_with_index do |instruction_map, combo_idx|
572
+ primary_instruction = instruction_map[0] || instruction_map.values.first || ""
502
573
  candidates << EvaluatedCandidate.new(
503
- instruction: instruction,
574
+ instruction: primary_instruction,
504
575
  few_shot_examples: [],
505
576
  type: CandidateType::InstructionOnly,
506
- metadata: { proposal_rank: idx },
577
+ metadata: {
578
+ proposal_rank: combo_idx,
579
+ instructions_map: duplicate_instruction_map(instruction_map),
580
+ demos_map: {}
581
+ },
507
582
  config_id: SecureRandom.hex(6)
508
583
  )
509
584
  end
510
585
 
511
- # Few-shot only candidates
512
- # Extract demo sets from first predictor (predictor index 0)
513
- demo_sets = demo_candidates[0] || []
514
- demo_sets.each_with_index do |demo_set, idx|
586
+ demo_maps.each_with_index do |demo_map, idx|
587
+ next if demo_map.empty?
588
+
589
+ flattened_examples = demo_map.values.flatten
515
590
  candidates << EvaluatedCandidate.new(
516
591
  instruction: "",
517
- few_shot_examples: demo_set,
592
+ few_shot_examples: flattened_examples,
518
593
  type: CandidateType::FewShotOnly,
519
- metadata: { bootstrap_rank: idx },
594
+ metadata: {
595
+ bootstrap_rank: idx,
596
+ instructions_map: {},
597
+ demos_map: duplicate_demo_map(demo_map)
598
+ },
520
599
  config_id: SecureRandom.hex(6)
521
600
  )
522
601
  end
523
602
 
524
603
  # Combined candidates (instruction + few-shot)
525
- top_instructions = proposal_result.candidate_instructions.take(3)
526
- top_bootstrap_sets = demo_sets.take(3)
527
-
528
- top_instructions.each_with_index do |instruction, i_idx|
529
- top_bootstrap_sets.each_with_index do |candidate_set, b_idx|
604
+ instruction_maps.each_with_index do |instruction_map, combo_idx|
605
+ primary_instruction = instruction_map[0] || instruction_map.values.first || ""
606
+ demo_maps.first(3).each_with_index do |demo_map, demo_idx|
607
+ next if demo_map.empty?
608
+
609
+ flattened_examples = demo_map.values.flatten
530
610
  candidates << EvaluatedCandidate.new(
531
- instruction: instruction,
532
- few_shot_examples: candidate_set,
611
+ instruction: primary_instruction,
612
+ few_shot_examples: flattened_examples,
533
613
  type: CandidateType::Combined,
534
- metadata: {
535
- instruction_rank: i_idx,
536
- bootstrap_rank: b_idx
614
+ metadata: {
615
+ instruction_rank: combo_idx,
616
+ bootstrap_rank: demo_idx,
617
+ instructions_map: duplicate_instruction_map(instruction_map),
618
+ demos_map: duplicate_demo_map(demo_map)
537
619
  },
538
620
  config_id: SecureRandom.hex(6)
539
621
  )
540
622
  end
541
623
  end
542
-
624
+
543
625
  candidates
544
626
  end
545
627
 
628
+ sig { params(predictor_instruction_map: T::Hash[Integer, T::Array[String]]).returns(T::Array[T::Hash[Integer, String]]) }
629
+ def build_instruction_maps(predictor_instruction_map)
630
+ return [{}] if predictor_instruction_map.nil? || predictor_instruction_map.empty?
631
+
632
+ normalized = predictor_instruction_map.each_with_object({}) do |(index, instructions), memo|
633
+ next if instructions.nil? || instructions.empty?
634
+ memo[index] = instructions.take(3)
635
+ end
636
+
637
+ return [{}] if normalized.empty?
638
+
639
+ cartesian_product(normalized)
640
+ end
641
+
642
+ sig do
643
+ params(demo_candidates: T::Hash[Integer, T::Array[T::Array[DSPy::FewShotExample]]]).returns(T::Array[T::Hash[Integer, T::Array[DSPy::FewShotExample]]])
644
+ end
645
+ def build_demo_maps(demo_candidates)
646
+ return [{}] if demo_candidates.nil? || demo_candidates.empty?
647
+
648
+ normalized = demo_candidates.each_with_object({}) do |(index, sets), memo|
649
+ next if sets.nil? || sets.empty?
650
+ memo[index] = sets.take(3)
651
+ end
652
+
653
+ return [{}] if normalized.empty?
654
+
655
+ cartesian_product(normalized)
656
+ end
657
+
658
+ sig do
659
+ params(options_hash: T::Hash[Integer, T::Array[T.untyped]]).returns(T::Array[T::Hash[Integer, T.untyped]])
660
+ end
661
+ def cartesian_product(options_hash)
662
+ options_hash.sort_by { |index, _| index }.reduce([{}]) do |acc, (index, values)|
663
+ next acc if values.nil? || values.empty?
664
+
665
+ acc.flat_map do |existing|
666
+ values.map do |value|
667
+ existing.merge(index => value)
668
+ end
669
+ end
670
+ end
671
+ end
672
+
673
+ sig { params(instruction_map: T::Hash[Integer, String]).returns(T::Hash[Integer, String]) }
674
+ def duplicate_instruction_map(instruction_map)
675
+ instruction_map.each_with_object({}) do |(index, instruction), memo|
676
+ memo[index] = instruction.is_a?(String) ? instruction.dup : instruction
677
+ end
678
+ end
679
+
680
+ sig do
681
+ params(demo_map: T::Hash[Integer, T::Array[DSPy::FewShotExample]]).returns(T::Hash[Integer, T::Array[DSPy::FewShotExample]])
682
+ end
683
+ def duplicate_demo_map(demo_map)
684
+ demo_map.each_with_object({}) do |(index, demos), memo|
685
+ next if demos.nil?
686
+ memo[index] = demos.map { |demo| demo }
687
+ end
688
+ end
689
+
690
+ sig { params(examples: T::Array[T.untyped]).returns(T::Array[DSPy::FewShotExample]) }
691
+ def normalize_few_shot_examples(examples)
692
+ examples.map do |example|
693
+ if example.is_a?(DSPy::FewShotExample)
694
+ example
695
+ elsif example.is_a?(DSPy::Example)
696
+ DSPy::FewShotExample.new(
697
+ input: example.input_values,
698
+ output: example.expected_values,
699
+ reasoning: extract_reasoning_from_example(example)
700
+ )
701
+ else
702
+ example
703
+ end
704
+ end
705
+ end
706
+
707
+ sig { params(predictor: T.untyped, examples: T::Array[DSPy::FewShotExample]).void }
708
+ def assign_predictor_examples(predictor, examples)
709
+ predictor.demos = examples if predictor.respond_to?(:demos=)
710
+ return unless predictor.respond_to?(:prompt)
711
+
712
+ cloned_examples = examples.map { |ex| ex }
713
+ predictor.prompt.instance_variable_set(:@few_shot_examples, cloned_examples.freeze)
714
+ end
715
+
546
716
  # Initialize optimization state for candidate selection
547
717
  sig { params(candidates: T::Array[EvaluatedCandidate]).returns(T::Hash[Symbol, T.untyped]) }
548
718
  def initialize_optimization_state(candidates)
@@ -722,7 +892,11 @@ module DSPy
722
892
  modified_program = apply_candidate_configuration(program, candidate)
723
893
 
724
894
  # Evaluate modified program
725
- evaluation_result = evaluate_program(modified_program, evaluation_set)
895
+ evaluation_result = if use_concurrent_evaluation?(evaluation_set)
896
+ evaluate_candidate_concurrently(modified_program, evaluation_set)
897
+ else
898
+ evaluate_program(modified_program, evaluation_set)
899
+ end
726
900
 
727
901
  # Store evaluation details
728
902
  @evaluated_candidates << candidate
@@ -730,32 +904,131 @@ module DSPy
730
904
  [evaluation_result.pass_rate, modified_program, evaluation_result]
731
905
  end
732
906
 
907
+ sig { params(evaluation_set: T::Array[DSPy::Example]).returns(T::Boolean) }
908
+ def use_concurrent_evaluation?(evaluation_set)
909
+ minibatch_size = config.minibatch_size
910
+ return false unless minibatch_size&.positive?
911
+ return false unless config.num_threads && config.num_threads > 1
912
+
913
+ evaluation_set.size > minibatch_size
914
+ end
915
+
916
+ sig do
917
+ params(
918
+ modified_program: T.untyped,
919
+ evaluation_set: T::Array[DSPy::Example]
920
+ ).returns(DSPy::Evaluate::BatchEvaluationResult)
921
+ end
922
+ def evaluate_candidate_concurrently(modified_program, evaluation_set)
923
+ chunk_size = T.must(config.minibatch_size)
924
+ chunks = evaluation_set.each_slice(chunk_size).map(&:dup)
925
+ return evaluate_program(modified_program, evaluation_set) if chunks.size <= 1
926
+
927
+ pool_size = [config.num_threads, chunks.size].min
928
+ pool_size = 1 if pool_size <= 0
929
+ executor = Concurrent::FixedThreadPool.new(pool_size)
930
+
931
+ futures = chunks.map do |chunk|
932
+ Concurrent::Promises.future_on(executor) do
933
+ evaluate_program(modified_program, chunk)
934
+ end
935
+ end
936
+
937
+ results = futures.map(&:value!)
938
+ combine_batch_results(results)
939
+ ensure
940
+ if executor
941
+ executor.shutdown
942
+ executor.wait_for_termination
943
+ end
944
+ end
945
+
946
+ sig do
947
+ params(batch_results: T::Array[DSPy::Evaluate::BatchEvaluationResult]).returns(DSPy::Evaluate::BatchEvaluationResult)
948
+ end
949
+ def combine_batch_results(batch_results)
950
+ return DSPy::Evaluate::BatchEvaluationResult.new(results: [], aggregated_metrics: {}) if batch_results.empty?
951
+
952
+ combined_results = batch_results.flat_map(&:results)
953
+ total_examples = batch_results.sum(&:total_examples)
954
+ aggregated_metrics = merge_aggregated_metrics(batch_results, total_examples)
955
+
956
+ DSPy::Evaluate::BatchEvaluationResult.new(
957
+ results: combined_results,
958
+ aggregated_metrics: aggregated_metrics
959
+ )
960
+ end
961
+
962
+ sig do
963
+ params(
964
+ batch_results: T::Array[DSPy::Evaluate::BatchEvaluationResult],
965
+ total_examples: Integer
966
+ ).returns(T::Hash[Symbol, T.untyped])
967
+ end
968
+ def merge_aggregated_metrics(batch_results, total_examples)
969
+ return {} if total_examples.zero?
970
+
971
+ keys = batch_results.flat_map { |res| res.aggregated_metrics.keys }.uniq
972
+ keys.each_with_object({}) do |key, memo|
973
+ numeric_weight = 0.0
974
+ numeric_sum = 0.0
975
+ fallback_value = nil
976
+
977
+ batch_results.each do |res|
978
+ value = res.aggregated_metrics[key]
979
+ next if value.nil?
980
+
981
+ if value.is_a?(Numeric)
982
+ numeric_sum += value.to_f * res.total_examples
983
+ numeric_weight += res.total_examples
984
+ else
985
+ fallback_value = value
986
+ end
987
+ end
988
+
989
+ if numeric_weight.positive?
990
+ memo[key] = numeric_sum / numeric_weight
991
+ elsif fallback_value
992
+ memo[key] = fallback_value
993
+ end
994
+ end
995
+ end
996
+
733
997
  # Apply candidate configuration to program
734
998
  sig { params(program: T.untyped, candidate: EvaluatedCandidate).returns(T.untyped) }
735
999
  def apply_candidate_configuration(program, candidate)
1000
+ instructions_map = candidate.metadata[:instructions_map] || {}
1001
+ demos_map = candidate.metadata[:demos_map] || {}
1002
+
736
1003
  modified_program = program
737
-
738
- # Apply instruction if provided
739
- if !candidate.instruction.empty? && program.respond_to?(:with_instruction)
740
- modified_program = modified_program.with_instruction(candidate.instruction)
741
- end
742
-
743
- # Apply few-shot examples if provided
744
- if candidate.few_shot_examples.any? && program.respond_to?(:with_examples)
745
- few_shot_examples = candidate.few_shot_examples.map do |example|
746
- # If already a FewShotExample, use it directly
747
- if example.is_a?(DSPy::FewShotExample)
748
- example
749
- else
750
- # Convert from DSPy::Example
751
- DSPy::FewShotExample.new(
752
- input: example.input_values,
753
- output: example.expected_values,
754
- reasoning: extract_reasoning_from_example(example)
755
- )
1004
+ if modified_program.respond_to?(:predictors) && (instructions_map.any? || demos_map.any?)
1005
+ modified_program = modified_program.clone
1006
+ modified_program.predictors.each_with_index do |predictor, idx|
1007
+ if instructions_map.key?(idx)
1008
+ signature = Utils.get_signature(predictor)
1009
+ updated_signature = signature.with_instructions(instructions_map[idx])
1010
+ Utils.set_signature(predictor, updated_signature)
1011
+ end
1012
+
1013
+ if demos_map.key?(idx)
1014
+ normalized_examples = normalize_few_shot_examples(demos_map[idx])
1015
+ assign_predictor_examples(predictor, normalized_examples)
756
1016
  end
757
1017
  end
758
- modified_program = modified_program.with_examples(few_shot_examples)
1018
+ end
1019
+
1020
+ # Apply instruction if provided (top-level programs still respect with_instruction)
1021
+ if !candidate.instruction.empty? && modified_program.respond_to?(:with_instruction)
1022
+ modified_program = modified_program.with_instruction(candidate.instruction)
1023
+ end
1024
+
1025
+ should_apply_global_examples = candidate.few_shot_examples.any? &&
1026
+ modified_program.respond_to?(:with_examples) &&
1027
+ (demos_map.empty? || !modified_program.respond_to?(:predictors))
1028
+
1029
+ if should_apply_global_examples
1030
+ normalized_few_shot = normalize_few_shot_examples(candidate.few_shot_examples)
1031
+ modified_program = modified_program.with_examples(normalized_few_shot)
759
1032
  end
760
1033
 
761
1034
  modified_program
@@ -824,14 +1097,16 @@ module DSPy
824
1097
 
825
1098
  history = {
826
1099
  total_trials: optimization_result[:trials_completed],
827
- optimization_strategy: config.optimization_strategy,
1100
+ optimization_strategy: optimization_strategy_name,
828
1101
  early_stopped: optimization_result[:trials_completed] < config.num_trials,
829
- score_history: optimization_result[:optimization_state][:best_score_history]
1102
+ score_history: optimization_result[:optimization_state][:best_score_history],
1103
+ total_eval_calls: optimization_result[:total_eval_calls]
830
1104
  }
831
1105
 
832
1106
  metadata = {
833
1107
  optimizer: "MIPROv2",
834
1108
  auto_mode: infer_auto_mode,
1109
+ optimization_strategy: optimization_strategy_name,
835
1110
  best_instruction: best_candidate&.instruction || "",
836
1111
  best_few_shot_count: best_candidate&.few_shot_examples&.size || 0,
837
1112
  best_candidate_type: best_candidate&.type&.serialize || "unknown",
@@ -839,12 +1114,21 @@ module DSPy
839
1114
  }
840
1115
 
841
1116
  # Create bootstrap statistics from demo_candidates
842
- demo_sets = demo_candidates[0] || []
1117
+ num_predictors = demo_candidates.keys.size
1118
+ sets_per_predictor = demo_candidates.values.map(&:size)
1119
+ all_demo_sets = demo_candidates.values.flat_map { |sets| sets }
843
1120
  bootstrap_statistics = {
844
- num_predictors: demo_candidates.keys.size,
845
- demo_sets_per_predictor: demo_sets.size,
846
- avg_demos_per_set: demo_sets.empty? ? 0 : demo_sets.map(&:size).sum.to_f / demo_sets.size
1121
+ num_predictors: num_predictors,
1122
+ demo_sets_per_predictor: sets_per_predictor.max || 0,
1123
+ avg_demos_per_set: all_demo_sets.empty? ? 0 : all_demo_sets.map(&:size).sum.to_f / all_demo_sets.size
847
1124
  }
1125
+ bootstrap_statistics[:per_predictor_demo_counts] = sets_per_predictor if sets_per_predictor.any?
1126
+
1127
+ optimization_trace = serialize_optimization_trace(optimization_result[:optimization_state])
1128
+ optimization_trace[:trial_logs] = serialize_trial_logs(optimization_result[:trial_logs])
1129
+ optimization_trace[:param_score_dict] = serialize_param_score_dict(optimization_result[:param_score_dict])
1130
+ optimization_trace[:fully_evaled_param_combos] = serialize_fully_evaled_param_combos(optimization_result[:fully_evaled_param_combos])
1131
+ optimization_trace[:total_eval_calls] = optimization_result[:total_eval_calls]
848
1132
 
849
1133
  MIPROv2Result.new(
850
1134
  optimized_program: best_program,
@@ -854,7 +1138,7 @@ module DSPy
854
1138
  best_score_value: best_score,
855
1139
  metadata: metadata,
856
1140
  evaluated_candidates: @evaluated_candidates,
857
- optimization_trace: serialize_optimization_trace(optimization_result[:optimization_state]),
1141
+ optimization_trace: optimization_trace,
858
1142
  bootstrap_statistics: bootstrap_statistics,
859
1143
  proposal_statistics: proposal_result.analysis,
860
1144
  best_evaluation_result: best_evaluation_result
@@ -876,7 +1160,205 @@ module DSPy
876
1160
  serialized_trace
877
1161
  end
878
1162
 
1163
+ sig do
1164
+ params(
1165
+ trial_number: Integer,
1166
+ candidate: EvaluatedCandidate,
1167
+ evaluation_type: Symbol,
1168
+ batch_size: Integer
1169
+ ).returns(T::Hash[Symbol, T.untyped])
1170
+ end
1171
+ def create_trial_log_entry(trial_number:, candidate:, evaluation_type:, batch_size:)
1172
+ # Preserve interface parity with Python implementation (trial number stored implicitly via hash key)
1173
+ trial_number # no-op to acknowledge parameter usage
1174
+ instructions_map = candidate.metadata[:instructions_map] || {}
1175
+ demos_map = candidate.metadata[:demos_map] || {}
1176
+ entry = {
1177
+ candidate_id: candidate.config_id,
1178
+ candidate_type: candidate.type.serialize,
1179
+ instruction_preview: candidate.instruction.to_s[0, 160],
1180
+ few_shot_count: candidate.few_shot_examples.size,
1181
+ metadata: deep_dup(candidate.metadata),
1182
+ evaluation_type: evaluation_type,
1183
+ batch_size: batch_size,
1184
+ status: :in_progress,
1185
+ started_at: Time.now.iso8601
1186
+ }
1187
+ if instructions_map.any?
1188
+ entry[:instructions] = duplicate_instruction_map(instructions_map)
1189
+ entry[:instruction] = entry[:instructions][0] if entry[:instructions].key?(0)
1190
+ elsif candidate.instruction && !candidate.instruction.empty?
1191
+ predictor_index = candidate.metadata[:predictor_index] || 0
1192
+ entry[:instruction] = candidate.instruction
1193
+ entry[:instructions] = { predictor_index => candidate.instruction }
1194
+ end
1195
+ entry[:few_shot_map] = duplicate_demo_map(demos_map) if demos_map.any?
1196
+ entry
1197
+ end
1198
+
1199
+ sig do
1200
+ params(
1201
+ trial_logs: T::Hash[Integer, T::Hash[Symbol, T.untyped]],
1202
+ trial_number: Integer,
1203
+ score: T.nilable(Float),
1204
+ evaluation_type: Symbol,
1205
+ batch_size: Integer,
1206
+ total_eval_calls: Integer,
1207
+ error: T.nilable(String)
1208
+ ).void
1209
+ end
1210
+ def finalize_trial_log_entry(trial_logs, trial_number, score:, evaluation_type:, batch_size:, total_eval_calls:, error: nil)
1211
+ entry = trial_logs[trial_number] || {}
1212
+ entry[:score] = score if score
1213
+ entry[:evaluation_type] = evaluation_type
1214
+ entry[:batch_size] = batch_size
1215
+ entry[:total_eval_calls] = total_eval_calls
1216
+ entry[:status] = error ? :error : :completed
1217
+ entry[:error] = error if error
1218
+ entry[:completed_at] = Time.now.iso8601
1219
+ trial_logs[trial_number] = entry
1220
+ end
1221
+
1222
+ sig do
1223
+ params(
1224
+ param_score_dict: T::Hash[String, T::Array[T::Hash[Symbol, T.untyped]]],
1225
+ candidate: EvaluatedCandidate,
1226
+ score: Float,
1227
+ evaluation_type: Symbol,
1228
+ instructions: T.nilable(T::Hash[Integer, String])
1229
+ ).void
1230
+ end
1231
+ def record_param_score(param_score_dict, candidate, score, evaluation_type:, instructions: nil)
1232
+ instructions_hash = instructions || {}
1233
+ if instructions_hash.empty? && candidate.instruction && !candidate.instruction.empty?
1234
+ predictor_index = candidate.metadata[:predictor_index] || 0
1235
+ instructions_hash[predictor_index] = candidate.instruction
1236
+ end
1237
+
1238
+ record = {
1239
+ candidate_id: candidate.config_id,
1240
+ candidate_type: candidate.type.serialize,
1241
+ score: score,
1242
+ evaluation_type: evaluation_type,
1243
+ timestamp: Time.now.iso8601,
1244
+ metadata: deep_dup(candidate.metadata)
1245
+ }
1246
+ primary_instruction = instructions_hash[0] || candidate.instruction
1247
+ record[:instruction] = primary_instruction if primary_instruction && !primary_instruction.empty?
1248
+ record[:instructions] = instructions_hash unless instructions_hash.empty?
1249
+
1250
+ param_score_dict[candidate.config_id] << record
1251
+ end
1252
+
1253
+ sig do
1254
+ params(
1255
+ fully_evaled_param_combos: T::Hash[String, T::Hash[Symbol, T.untyped]],
1256
+ candidate: EvaluatedCandidate,
1257
+ score: Float,
1258
+ instructions: T.nilable(T::Hash[Integer, String])
1259
+ ).void
1260
+ end
1261
+ def update_fully_evaled_param_combos(fully_evaled_param_combos, candidate, score, instructions: nil)
1262
+ existing = fully_evaled_param_combos[candidate.config_id]
1263
+ if existing.nil? || score > existing[:score]
1264
+ instructions_hash = instructions || {}
1265
+ if instructions_hash.empty? && candidate.instruction && !candidate.instruction.empty?
1266
+ predictor_index = candidate.metadata[:predictor_index] || 0
1267
+ instructions_hash[predictor_index] = candidate.instruction
1268
+ end
1269
+
1270
+ fully_evaled_param_combos[candidate.config_id] = {
1271
+ candidate_id: candidate.config_id,
1272
+ candidate_type: candidate.type.serialize,
1273
+ score: score,
1274
+ metadata: deep_dup(candidate.metadata),
1275
+ updated_at: Time.now.iso8601
1276
+ }
1277
+ unless instructions_hash.empty?
1278
+ fully_evaled_param_combos[candidate.config_id][:instructions] = instructions_hash
1279
+ fully_evaled_param_combos[candidate.config_id][:instruction] = instructions_hash[0] || candidate.instruction
1280
+ end
1281
+ end
1282
+ end
1283
+
1284
+ sig { params(trial_logs: T.nilable(T::Hash[Integer, T::Hash[Symbol, T.untyped]])).returns(T::Hash[Integer, T::Hash[Symbol, T.untyped]]) }
1285
+ def serialize_trial_logs(trial_logs)
1286
+ return {} unless trial_logs
1287
+
1288
+ allowed_keys = [
1289
+ :candidate_id,
1290
+ :candidate_type,
1291
+ :instruction_preview,
1292
+ :instruction,
1293
+ :instructions,
1294
+ :few_shot_count,
1295
+ :metadata,
1296
+ :evaluation_type,
1297
+ :batch_size,
1298
+ :score,
1299
+ :status,
1300
+ :error,
1301
+ :started_at,
1302
+ :completed_at,
1303
+ :total_eval_calls
1304
+ ]
1305
+
1306
+ trial_logs.transform_values do |entry|
1307
+ entry.each_with_object({}) do |(key, value), memo|
1308
+ memo[key] = value if allowed_keys.include?(key)
1309
+ end
1310
+ end
1311
+ end
1312
+
1313
+ sig { params(param_score_dict: T.nilable(T::Hash[String, T::Array[T::Hash[Symbol, T.untyped]]])).returns(T::Hash[String, T::Array[T::Hash[Symbol, T.untyped]]]) }
1314
+ def serialize_param_score_dict(param_score_dict)
1315
+ return {} unless param_score_dict
1316
+
1317
+ allowed_keys = [:candidate_id, :candidate_type, :score, :evaluation_type, :timestamp, :metadata, :instruction, :instructions]
1318
+
1319
+ param_score_dict.transform_values do |records|
1320
+ records.map do |record|
1321
+ record.each_with_object({}) do |(key, value), memo|
1322
+ memo[key] = value if allowed_keys.include?(key)
1323
+ end
1324
+ end
1325
+ end
1326
+ end
1327
+
1328
+ sig { params(fully_evaled_param_combos: T.nilable(T::Hash[String, T::Hash[Symbol, T.untyped]])).returns(T::Hash[String, T::Hash[Symbol, T.untyped]]) }
1329
+ def serialize_fully_evaled_param_combos(fully_evaled_param_combos)
1330
+ return {} unless fully_evaled_param_combos
1331
+
1332
+ allowed_keys = [:candidate_id, :candidate_type, :score, :metadata, :updated_at, :instruction, :instructions]
1333
+
1334
+ fully_evaled_param_combos.transform_values do |record|
1335
+ record.each_with_object({}) do |(key, value), memo|
1336
+ memo[key] = value if allowed_keys.include?(key)
1337
+ end
1338
+ end
1339
+ end
1340
+
1341
+ sig { params(value: T.untyped).returns(T.untyped) }
1342
+ def deep_dup(value)
1343
+ case value
1344
+ when Hash
1345
+ value.each_with_object({}) { |(k, v), memo| memo[k] = deep_dup(v) }
1346
+ when Array
1347
+ value.map { |element| deep_dup(element) }
1348
+ else
1349
+ value
1350
+ end
1351
+ end
1352
+
879
1353
  # Helper methods
1354
+ sig { returns(String) }
1355
+ def optimization_strategy_name
1356
+ strategy = config.optimization_strategy
1357
+ return strategy.serialize if strategy.respond_to?(:serialize)
1358
+
1359
+ strategy.to_s
1360
+ end
1361
+
880
1362
  sig { params(program: T.untyped).returns(T.nilable(String)) }
881
1363
  def extract_current_instruction(program)
882
1364
  if program.respond_to?(:prompt) && program.prompt.respond_to?(:instruction)
@@ -889,6 +1371,23 @@ module DSPy
889
1371
  end
890
1372
  end
891
1373
 
1374
+ sig { params(program: T.untyped).returns(T::Hash[Integer, String]) }
1375
+ def extract_program_instructions(program)
1376
+ instructions = {}
1377
+ if program.respond_to?(:predictors)
1378
+ program.predictors.each_with_index do |predictor, index|
1379
+ if predictor.respond_to?(:prompt) && predictor.prompt.respond_to?(:instruction)
1380
+ value = predictor.prompt.instruction
1381
+ instructions[index] = value if value
1382
+ end
1383
+ end
1384
+ else
1385
+ fallback_instruction = extract_current_instruction(program)
1386
+ instructions[0] = fallback_instruction if fallback_instruction
1387
+ end
1388
+ instructions
1389
+ end
1390
+
892
1391
  sig { params(program: T.untyped).returns(T.nilable(T.class_of(DSPy::Signature))) }
893
1392
  def extract_signature_class(program)
894
1393
  program.respond_to?(:signature_class) ? program.signature_class : nil
@@ -921,4 +1420,4 @@ module DSPy
921
1420
  end
922
1421
  end
923
1422
  end
924
- end
1423
+ end