RubyGems - dspy - Versions diffs - 0.28.1 → 0.29.0 - Mend

dspy 0.28.1 → 0.29.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (45) hide show

checksums.yaml +4 -4
data/README.md +2 -3
data/lib/dspy/callbacks.rb +222 -0
data/lib/dspy/chain_of_thought.rb +2 -1
data/lib/dspy/code_act.rb +14 -1
data/lib/dspy/datasets/ade.rb +90 -0
data/lib/dspy/datasets.rb +8 -0
data/lib/dspy/lm.rb +9 -12
data/lib/dspy/mixins/struct_builder.rb +17 -25
data/lib/dspy/module.rb +45 -1
data/lib/dspy/observability/async_span_processor.rb +67 -93
data/lib/dspy/observability.rb +43 -1
data/lib/dspy/predict.rb +17 -0
data/lib/dspy/prompt.rb +90 -20
data/lib/dspy/propose/dataset_summary_generator.rb +210 -0
data/lib/dspy/propose/grounded_proposer.rb +320 -66
data/lib/dspy/re_act.rb +13 -0
data/lib/dspy/reflection_lm.rb +36 -0
data/lib/dspy/teleprompt/bootstrap_strategy.rb +26 -0
data/lib/dspy/teleprompt/gepa.rb +448 -2803
data/lib/dspy/teleprompt/mipro_v2.rb +624 -100
data/lib/dspy/teleprompt/utils.rb +349 -42
data/lib/dspy/version.rb +2 -2
data/lib/dspy.rb +4 -2
data/lib/gepa/api.rb +61 -0
data/lib/gepa/core/engine.rb +226 -0
data/lib/gepa/core/evaluation_batch.rb +26 -0
data/lib/gepa/core/result.rb +92 -0
data/lib/gepa/core/state.rb +231 -0
data/lib/gepa/logging/experiment_tracker.rb +54 -0
data/lib/gepa/logging/logger.rb +57 -0
data/lib/gepa/logging.rb +9 -0
data/lib/gepa/proposer/base.rb +27 -0
data/lib/gepa/proposer/merge_proposer.rb +424 -0
data/lib/gepa/proposer/reflective_mutation/base.rb +48 -0
data/lib/gepa/proposer/reflective_mutation/reflective_mutation.rb +188 -0
data/lib/gepa/strategies/batch_sampler.rb +91 -0
data/lib/gepa/strategies/candidate_selector.rb +97 -0
data/lib/gepa/strategies/component_selector.rb +57 -0
data/lib/gepa/strategies/instruction_proposal.rb +120 -0
data/lib/gepa/telemetry.rb +122 -0
data/lib/gepa/utils/pareto.rb +119 -0
data/lib/gepa.rb +21 -0
metadata +59 -4
data/lib/dspy/teleprompt/simple_optimizer.rb +0 -497

data/lib/dspy/teleprompt/utils.rb CHANGED Viewed

@@ -1,6 +1,7 @@
 # frozen_string_literal: true
 require 'sorbet-runtime'
+require 'fileutils'
 require_relative '../evaluate'
 require_relative '../example'
 require_relative 'data_handler'
@@ -12,6 +13,167 @@ module DSPy
     module Utils
       extend T::Sig
+      # Wrapper class that provides Python-compatible signature API
+      # Wraps a Predict instance to provide signature access and modification
+      class SignatureWrapper
+        extend T::Sig
+        sig { returns(T.untyped) }
+        attr_reader :predictor
+        sig { params(predictor: T.untyped).void }
+        def initialize(predictor)
+          @predictor = predictor
+        end
+        sig { returns(String) }
+        def instructions
+          # Get instructions from the predictor's prompt
+          @predictor.prompt.instruction
+        end
+        sig { params(new_instructions: String).returns(SignatureWrapper) }
+        def with_instructions(new_instructions)
+          # Return a new wrapper that will apply new instructions when set
+          updated_wrapper = SignatureWrapper.new(@predictor)
+          updated_wrapper.instance_variable_set(:@pending_instructions, new_instructions)
+          updated_wrapper
+        end
+        sig { returns(T.nilable(String)) }
+        def pending_instructions
+          @pending_instructions
+        end
+      end
+      # Get signature information from a predictor (Python compatibility)
+      # Returns a wrapper that provides Python-like signature API
+      #
+      # @param predictor [Predict] The predictor to get signature from
+      # @return [SignatureWrapper] Wrapper providing signature access
+      sig { params(predictor: T.untyped).returns(SignatureWrapper) }
+      def self.get_signature(predictor)
+        SignatureWrapper.new(predictor)
+      end
+      # Set signature on a predictor (Python compatibility)
+      # Updates the predictor's prompt with new instructions
+      #
+      # @param predictor [Predict] The predictor to update
+      # @param updated_signature [SignatureWrapper] The updated signature wrapper
+      sig { params(predictor: T.untyped, updated_signature: SignatureWrapper).void }
+      def self.set_signature(predictor, updated_signature)
+        # Extract pending instructions from the wrapper
+        new_instructions = updated_signature.pending_instructions
+        if new_instructions
+          # Update the predictor's prompt with new instructions
+          # We mutate the prompt's instruction directly for MIPROv2 compatibility
+          predictor.prompt.instance_variable_set(:@instruction, new_instructions)
+        end
+      end
+      # Create a minibatch from the trainset using random sampling
+      # This function is compatible with Python DSPy's MIPROv2 implementation
+      #
+      # @param trainset [Array] The training dataset to sample from
+      # @param batch_size [Integer] The desired size of the minibatch (default: 50)
+      # @param rng [Random, nil] Optional random number generator for reproducible sampling
+      # @return [Array] A randomly sampled subset of the trainset
+      sig do
+        params(
+          trainset: T::Array[T.untyped],
+          batch_size: Integer,
+          rng: T.nilable(Random)
+        ).returns(T::Array[T.untyped])
+      end
+      def self.create_minibatch(trainset, batch_size = 50, rng = nil)
+        # Ensure batch_size isn't larger than the size of the dataset
+        actual_batch_size = [batch_size, trainset.size].min
+        # Randomly sample from trainset
+        # If RNG is provided, use it for reproducible sampling
+        if rng
+          trainset.sample(actual_batch_size, random: rng)
+        else
+          trainset.sample(actual_batch_size)
+        end
+      end
+      # Get program with highest average score from minibatch trials
+      # Used as a helper function for Bayesian + minibatching optimizers
+      #
+      # @param param_score_dict [Hash] Maps combo keys to arrays of [score, program, params] tuples
+      # @param fully_evaled_param_combos [Array] List of combo keys that have been fully evaluated
+      # @return [Array] Returns [program, mean_score, combo_key, params]
+      sig do
+        params(
+          param_score_dict: T::Hash[String, T::Array[T::Array[T.untyped]]],
+          fully_evaled_param_combos: T::Array[String]
+        ).returns([T.untyped, Float, String, T::Hash[Symbol, T.untyped]])
+      end
+      def self.get_program_with_highest_avg_score(param_score_dict, fully_evaled_param_combos)
+        # Calculate the mean for each combination of categorical parameters, based on past trials
+        results = []
+        param_score_dict.each do |key, values|
+          scores = values.map { |v| v[0] }
+          mean = scores.sum.to_f / scores.size
+          program = values[0][1]
+          params = values[0][2]
+          results << [key, mean, program, params]
+        end
+        # Sort results by the mean in descending order
+        sorted_results = results.sort_by { |_key, mean, _program, _params| -mean }
+        # Find the combination with the highest mean, skip fully evaluated ones
+        sorted_results.each do |key, mean, program, params|
+          next if fully_evaled_param_combos.include?(key)
+          return [program, mean, key, params]
+        end
+        # If no valid program is found, return the last valid one
+        _key, mean, program, params = sorted_results.last
+        [program, mean, _key, params]
+      end
+      # Save a candidate program to the log directory
+      # Used during optimization to save intermediate trial results
+      #
+      # @param program [Module] The program to save
+      # @param log_dir [String, nil] The directory to save to (returns nil if nil)
+      # @param trial_num [Integer] The trial number for naming the file
+      # @param note [String, nil] Optional note to append to filename
+      # @return [String, nil] The path where program was saved, or nil if log_dir is nil
+      sig do
+        params(
+          program: T.untyped,
+          log_dir: T.nilable(String),
+          trial_num: Integer,
+          note: T.nilable(String)
+        ).returns(T.nilable(String))
+      end
+      def self.save_candidate_program(program, log_dir, trial_num, note: nil)
+        return nil if log_dir.nil?
+        # Ensure the directory exists
+        eval_programs_dir = File.join(log_dir, "evaluated_programs")
+        FileUtils.mkdir_p(eval_programs_dir) unless Dir.exist?(eval_programs_dir)
+        # Define the save path for the program
+        filename = if note
+          "program_#{trial_num}_#{note}.json"
+        else
+          "program_#{trial_num}.json"
+        end
+        save_path = File.join(eval_programs_dir, filename)
+        # Save the program
+        program.save(save_path)
+        save_path
+      end
       # Configuration for bootstrap operations
       class BootstrapConfig
         extend T::Sig
@@ -50,6 +212,9 @@ module DSPy
       end
       # Result of bootstrap operation
+      # @deprecated This class is deprecated and kept only for backward compatibility.
+      #   The new create_n_fewshot_demo_sets returns a Hash{predictor_idx => [[demos]]}
+      #   instead of this BootstrapResult object. Use the dict interface directly.
       class BootstrapResult
         extend T::Sig
@@ -93,58 +258,200 @@ module DSPy
         end
       end
-      # Create multiple candidate sets of few-shot examples through bootstrapping
+      # Create multiple candidate sets of few-shot demonstrations using different bootstrap strategies
+      #
+      # This is the Python-compatible implementation that uses a seed-based loop to create
+      # demo sets using 4 strategies: ZeroShot (-3), LabeledOnly (-2), Unshuffled (-1), and Shuffled (>=0)
+      #
+      # @param student [DSPy::Module] The student program to bootstrap
+      # @param num_candidate_sets [Integer] Number of demo sets to create (accounts for special seeds)
+      # @param trainset [Array<DSPy::Example>] Training examples
+      # @param max_bootstrapped_demos [Integer] Maximum bootstrapped demonstrations per set
+      # @param max_labeled_demos [Integer] Maximum labeled demonstrations to prepend
+      # @param min_num_samples [Integer] Minimum number of samples for shuffled strategy
+      # @param metric [Proc] Optional metric to validate bootstrapped examples
+      # @param teacher_settings [Hash] Settings for teacher program (future use)
+      # @param seed [Integer] Random seed for reproducibility
+      # @param include_non_bootstrapped [Boolean] Include ZeroShot and LabeledOnly strategies
+      # @param labeled_sample [Boolean] Whether to sample labeled examples randomly
+      # @return [Hash{Integer => Array<Array<DSPy::FewShotExample>>}] Map of predictor index to demo sets
       sig do
         params(
-          program: T.untyped,
+          student: T.untyped,
+          num_candidate_sets: Integer,
           trainset: T::Array[T.untyped],
-          config: BootstrapConfig,
-          metric: T.nilable(T.proc.params(arg0: T.untyped, arg1: T.untyped).returns(T::Boolean))
-        ).returns(BootstrapResult)
+          max_bootstrapped_demos: Integer,
+          max_labeled_demos: Integer,
+          min_num_samples: Integer,
+          metric: T.nilable(T.proc.params(arg0: T.untyped, arg1: T.untyped).returns(T::Boolean)),
+          teacher_settings: T::Hash[Symbol, T.untyped],
+          seed: T.nilable(Integer),
+          include_non_bootstrapped: T::Boolean,
+          labeled_sample: T::Boolean
+        ).returns(T::Hash[Integer, T::Array[T::Array[DSPy::FewShotExample]]])
       end
-      def self.create_n_fewshot_demo_sets(program, trainset, config: BootstrapConfig.new, metric: nil)
-        DSPy::Context.with_span(
-          operation: 'optimization.bootstrap_start',
-          'dspy.module' => 'Bootstrap',
-          'bootstrap.trainset_size' => trainset.size,
-          'bootstrap.max_examples' => config.max_bootstrapped_examples,
-          'bootstrap.num_candidate_sets' => config.num_candidate_sets
-        ) do
-          # Convert to typed examples if needed
-          typed_examples = ensure_typed_examples(trainset)
-          # Generate successful examples through bootstrap
-          successful_examples, failed_examples = generate_successful_examples(
-            program,
-            typed_examples,
-            config,
-            metric
-          )
+      def self.create_n_fewshot_demo_sets(
+        student,
+        num_candidate_sets,
+        trainset,
+        max_bootstrapped_demos: 3,
+        max_labeled_demos: 3,
+        min_num_samples: 1,
+        metric: nil,
+        teacher_settings: {},
+        seed: nil,
+        include_non_bootstrapped: true,
+        labeled_sample: true
+      )
+        demo_candidates = Hash.new { |h, k| h[k] = [] }
+        rng = seed ? Random.new(seed) : Random.new
+        # Determine number of predictors exposed by the student module
+        num_predictors = if student.respond_to?(:predictors)
+          predictors = Array(student.predictors)
+          predictors.empty? ? 1 : predictors.size
+        else
+          1
+        end
-          # Create candidate sets from successful examples
-          candidate_sets = create_candidate_sets(successful_examples, config)
-          # Gather statistics
-          statistics = {
-            total_trainset: trainset.size,
-            successful_count: successful_examples.size,
-            failed_count: failed_examples.size,
-            success_rate: successful_examples.size.to_f / (successful_examples.size + failed_examples.size),
-            candidate_sets_created: candidate_sets.size,
-            average_set_size: candidate_sets.empty? ? 0 : candidate_sets.map(&:size).sum.to_f / candidate_sets.size
-          }
+        # Adjust for 3 special seeds (-3, -2, -1)
+        adjusted_num_sets = num_candidate_sets - 3
+        # Loop from -3 to adjusted_num_sets (exclusive)
+        (-3...adjusted_num_sets).each do |current_seed|
+          case current_seed
+          when -3  # ZeroShot strategy
+            next unless include_non_bootstrapped
+            # Empty demo sets for all predictors
+            num_predictors.times { |idx| demo_candidates[idx] << [] }
+          when -2  # LabeledOnly strategy
+            next unless include_non_bootstrapped && max_labeled_demos > 0
+            # Sample or take labeled examples
+            labeled_demos = create_labeled_demos(trainset, max_labeled_demos, labeled_sample, rng)
+            num_predictors.times { |idx| demo_candidates[idx] << labeled_demos }
+          when -1  # Unshuffled strategy
+            # Bootstrap without shuffle
+            bootstrapped_demos = create_bootstrapped_demos(
+              student, trainset, max_bootstrapped_demos, max_labeled_demos, metric
+            )
+            num_predictors.times { |idx| demo_candidates[idx] << bootstrapped_demos }
+          else  # Shuffled strategies (seed >= 0)
+            # Shuffle trainset with current seed
+            seed_rng = Random.new(current_seed)
+            shuffled_trainset = trainset.shuffle(random: seed_rng)
+            # Random demo count between min and max
+            num_demos = seed_rng.rand(min_num_samples..max_bootstrapped_demos)
+            # Bootstrap with shuffled data
+            bootstrapped_demos = create_bootstrapped_demos(
+              student, shuffled_trainset, num_demos, max_labeled_demos, metric
+            )
+            num_predictors.times { |idx| demo_candidates[idx] << bootstrapped_demos }
+          end
+        end
-          emit_bootstrap_complete_event(statistics)
+        demo_candidates
+      end
-          BootstrapResult.new(
-            candidate_sets: candidate_sets,
-            successful_examples: successful_examples,
-            failed_examples: failed_examples,
-            statistics: statistics
+      # Create labeled demonstrations from trainset examples
+      sig do
+        params(
+          trainset: T::Array[T.untyped],
+          max_labeled: Integer,
+          labeled_sample: T::Boolean,
+          rng: Random
+        ).returns(T::Array[DSPy::FewShotExample])
+      end
+      def self.create_labeled_demos(trainset, max_labeled, labeled_sample, rng)
+        examples = if labeled_sample
+          trainset.sample([max_labeled, trainset.size].min, random: rng)
+        else
+          trainset.take(max_labeled)
+        end
+        examples.map do |ex|
+          DSPy::FewShotExample.new(
+            input: ex.input_values,
+            output: ex.expected_values
           )
         end
       end
+      # Create bootstrapped demonstrations by executing student on trainset
+      sig do
+        params(
+          student: T.untyped,
+          trainset: T::Array[T.untyped],
+          max_bootstrapped: Integer,
+          max_labeled: Integer,
+          metric: T.nilable(T.proc.params(arg0: T.untyped, arg1: T.untyped).returns(T::Boolean))
+        ).returns(T::Array[DSPy::FewShotExample])
+      end
+      def self.create_bootstrapped_demos(student, trainset, max_bootstrapped, max_labeled, metric)
+        successful_demos = []
+        # Execute student on trainset to bootstrap demonstrations
+        trainset.each do |example|
+          break if successful_demos.size >= max_bootstrapped
+          begin
+            # Call student with input
+            prediction = student.call(**example.input_values)
+            prediction_hash = prediction.respond_to?(:to_h) ? prediction.to_h : prediction
+            # Check if prediction matches expected output
+            success = if metric
+              metric.call(example, prediction_hash)
+            else
+              example.matches_prediction?(prediction_hash)
+            end
+            if success
+              # Extract only output fields from prediction
+              output_fields = extract_output_fields_for_demo(prediction_hash, example.signature_class)
+              demo = DSPy::FewShotExample.new(
+                input: example.input_values,
+                output: output_fields
+              )
+              successful_demos << demo
+            end
+          rescue => e
+            # Continue on errors
+            DSPy.logger.warn("Bootstrap error: #{e.message}") if DSPy.logger
+          end
+        end
+        # Prepend labeled examples if requested
+        if max_labeled > 0
+          labeled = trainset.take(max_labeled).map do |ex|
+            DSPy::FewShotExample.new(
+              input: ex.input_values,
+              output: ex.expected_values
+            )
+          end
+          successful_demos = labeled + successful_demos
+        end
+        successful_demos
+      end
+      # Extract only output fields from prediction hash
+      sig do
+        params(
+          prediction_hash: T::Hash[Symbol, T.untyped],
+          signature_class: T.class_of(DSPy::Signature)
+        ).returns(T::Hash[Symbol, T.untyped])
+      end
+      def self.extract_output_fields_for_demo(prediction_hash, signature_class)
+        output_field_names = signature_class.output_field_descriptors.keys
+        prediction_hash.slice(*output_field_names)
+      end
       # Evaluate a candidate program on examples with proper error handling
       sig do
         params(
@@ -404,4 +711,4 @@ module DSPy
       end
     end
   end
-end
+end

data/lib/dspy/version.rb CHANGED Viewed

@@ -1,5 +1,5 @@
 # frozen_string_literal: true
 module DSPy
-  VERSION = "0.28.1"
-end
+  VERSION = "0.29.0"
+end

data/lib/dspy.rb CHANGED Viewed

@@ -12,6 +12,7 @@ require_relative 'dspy/observability/observation_type'
 require_relative 'dspy/context'
 require_relative 'dspy/events'
 require_relative 'dspy/events/types'
+require_relative 'dspy/reflection_lm'
 module DSPy
   extend Dry::Configurable
@@ -191,12 +192,14 @@ module DSPy
   end
 end
+require_relative 'dspy/callbacks'
 require_relative 'dspy/module'
 require_relative 'dspy/field'
 require_relative 'dspy/signature'
 require_relative 'dspy/few_shot_example'
 require_relative 'dspy/prompt'
 require_relative 'dspy/example'
+require_relative 'dspy/datasets'
 require_relative 'dspy/lm'
 require_relative 'dspy/image'
 require_relative 'dspy/prediction'
@@ -210,10 +213,9 @@ require_relative 'dspy/evaluate'
 require_relative 'dspy/teleprompt/teleprompter'
 require_relative 'dspy/teleprompt/utils'
 require_relative 'dspy/teleprompt/data_handler'
+require_relative 'dspy/teleprompt/gepa'
 require_relative 'dspy/propose/grounded_proposer'
-require_relative 'dspy/teleprompt/simple_optimizer'
 require_relative 'dspy/teleprompt/mipro_v2'
-require_relative 'dspy/teleprompt/gepa'
 require_relative 'dspy/tools'
 require_relative 'dspy/memory'
 require_relative 'dspy/storage/program_storage'

data/lib/gepa/api.rb ADDED Viewed

@@ -0,0 +1,61 @@
+# frozen_string_literal: true
+require 'sorbet-runtime'
+require_relative 'core/engine'
+require_relative 'core/result'
+module GEPA
+  extend T::Sig
+  module_function
+  sig do
+    params(
+      seed_candidate: T::Hash[String, String],
+      trainset: T::Array[T.untyped],
+      valset: T::Array[T.untyped],
+      adapter: T.untyped,
+      reflective_proposer: T.untyped,
+      merge_proposer: T.nilable(T.untyped),
+      logger: T.untyped,
+      experiment_tracker: T.untyped,
+      max_metric_calls: Integer,
+      telemetry: T.nilable(T.untyped)
+    ).returns(GEPA::Core::Result)
+  end
+  def optimize(
+    seed_candidate:,
+    trainset:,
+    valset:,
+    adapter:,
+    reflective_proposer:,
+    merge_proposer: nil,
+    logger:,
+    experiment_tracker:,
+    max_metric_calls:,
+    telemetry: nil
+  )
+    evaluator = proc { |dataset, candidate| adapter.evaluate(dataset, candidate) }
+    engine = GEPA::Core::Engine.new(
+      run_dir: nil,
+      evaluator: evaluator,
+      valset: valset,
+      seed_candidate: seed_candidate,
+      max_metric_calls: max_metric_calls,
+      perfect_score: Float::INFINITY,
+      seed: 0,
+      reflective_proposer: reflective_proposer,
+      merge_proposer: merge_proposer,
+      logger: logger,
+      experiment_tracker: experiment_tracker,
+      telemetry: telemetry || GEPA::Telemetry,
+      track_best_outputs: false,
+      display_progress_bar: false,
+      raise_on_exception: true
+    )
+    state = engine.run
+    GEPA::Core::Result.from_state(state)
+  end
+end