RubyGems - dspy - Versions diffs - 0.28.1 → 0.29.0 - Mend

dspy 0.28.1 → 0.29.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (45) hide show

checksums.yaml +4 -4
data/README.md +2 -3
data/lib/dspy/callbacks.rb +222 -0
data/lib/dspy/chain_of_thought.rb +2 -1
data/lib/dspy/code_act.rb +14 -1
data/lib/dspy/datasets/ade.rb +90 -0
data/lib/dspy/datasets.rb +8 -0
data/lib/dspy/lm.rb +9 -12
data/lib/dspy/mixins/struct_builder.rb +17 -25
data/lib/dspy/module.rb +45 -1
data/lib/dspy/observability/async_span_processor.rb +67 -93
data/lib/dspy/observability.rb +43 -1
data/lib/dspy/predict.rb +17 -0
data/lib/dspy/prompt.rb +90 -20
data/lib/dspy/propose/dataset_summary_generator.rb +210 -0
data/lib/dspy/propose/grounded_proposer.rb +320 -66
data/lib/dspy/re_act.rb +13 -0
data/lib/dspy/reflection_lm.rb +36 -0
data/lib/dspy/teleprompt/bootstrap_strategy.rb +26 -0
data/lib/dspy/teleprompt/gepa.rb +448 -2803
data/lib/dspy/teleprompt/mipro_v2.rb +624 -100
data/lib/dspy/teleprompt/utils.rb +349 -42
data/lib/dspy/version.rb +2 -2
data/lib/dspy.rb +4 -2
data/lib/gepa/api.rb +61 -0
data/lib/gepa/core/engine.rb +226 -0
data/lib/gepa/core/evaluation_batch.rb +26 -0
data/lib/gepa/core/result.rb +92 -0
data/lib/gepa/core/state.rb +231 -0
data/lib/gepa/logging/experiment_tracker.rb +54 -0
data/lib/gepa/logging/logger.rb +57 -0
data/lib/gepa/logging.rb +9 -0
data/lib/gepa/proposer/base.rb +27 -0
data/lib/gepa/proposer/merge_proposer.rb +424 -0
data/lib/gepa/proposer/reflective_mutation/base.rb +48 -0
data/lib/gepa/proposer/reflective_mutation/reflective_mutation.rb +188 -0
data/lib/gepa/strategies/batch_sampler.rb +91 -0
data/lib/gepa/strategies/candidate_selector.rb +97 -0
data/lib/gepa/strategies/component_selector.rb +57 -0
data/lib/gepa/strategies/instruction_proposal.rb +120 -0
data/lib/gepa/telemetry.rb +122 -0
data/lib/gepa/utils/pareto.rb +119 -0
data/lib/gepa.rb +21 -0
metadata +59 -4
data/lib/dspy/teleprompt/simple_optimizer.rb +0 -497

data/lib/dspy/propose/grounded_proposer.rb CHANGED Viewed

@@ -11,40 +11,80 @@ module DSPy
     class GroundedProposer
       extend T::Sig
-      # Configuration for instruction proposal
+      MAX_HISTORY_INSTRUCTIONS = 5
+      # Python-compatible TIPS dictionary for instruction generation
+      TIPS = {
+        "none" => "",
+        "creative" => "Don't be afraid to be creative when creating the new instruction!",
+        "simple" => "Keep the instruction clear and concise.",
+        "description" => "Make sure your instruction is very informative and descriptive.",
+        "high_stakes" => "The instruction should include a high stakes scenario in which the LM must solve the task!",
+        "persona" => 'Include a persona that is relevant to the task in the instruction (ie. "You are a ...")'
+      }.freeze
+      # Configuration for instruction proposal (Python-compatible)
       class Config
         extend T::Sig
+        # Core parameters
         sig { returns(Integer) }
         attr_accessor :num_instruction_candidates
+        # Python-compatible awareness flags (match Python defaults exactly)
+        sig { returns(T::Boolean) }
+        attr_accessor :program_aware
+        sig { returns(T::Boolean) }
+        attr_accessor :use_dataset_summary
+        sig { returns(T::Boolean) }
+        attr_accessor :use_task_demos
+        sig { returns(T::Boolean) }
+        attr_accessor :use_tip
+        sig { returns(T::Boolean) }
+        attr_accessor :use_instruct_history
+        # Additional parameters
         sig { returns(Integer) }
-        attr_accessor :max_examples_for_analysis
+        attr_accessor :view_data_batch_size
         sig { returns(Integer) }
-        attr_accessor :max_instruction_length
+        attr_accessor :num_demos_in_context
         sig { returns(T::Boolean) }
-        attr_accessor :use_task_description
+        attr_accessor :set_tip_randomly
         sig { returns(T::Boolean) }
-        attr_accessor :use_input_output_analysis
+        attr_accessor :set_history_randomly
-        sig { returns(T::Boolean) }
-        attr_accessor :use_few_shot_examples
+        sig { returns(Float) }
+        attr_accessor :init_temperature
-        sig { returns(String) }
-        attr_accessor :proposal_model
+        sig { returns(T::Boolean) }
+        attr_accessor :verbose
         sig { void }
         def initialize
+          # Core parameters
           @num_instruction_candidates = 5
-          @max_examples_for_analysis = 10
-          @max_instruction_length = 200
-          @use_task_description = true
-          @use_input_output_analysis = true
-          @use_few_shot_examples = true
-          @proposal_model = "gpt-4o-mini"
+          # Python-compatible awareness flags (match Python defaults)
+          @program_aware = true
+          @use_dataset_summary = true
+          @use_task_demos = true
+          @use_tip = true
+          @use_instruct_history = true
+          # Additional parameters
+          @view_data_batch_size = 10
+          @num_demos_in_context = 3
+          @set_tip_randomly = true
+          @set_history_randomly = true
+          @init_temperature = 1.0
+          @verbose = false
         end
       end
@@ -55,6 +95,9 @@ module DSPy
         sig { returns(T::Array[String]) }
         attr_reader :candidate_instructions
+        sig { returns(T::Hash[Integer, T::Array[String]]) }
+        attr_reader :predictor_instructions
         sig { returns(T::Hash[Symbol, T.untyped]) }
         attr_reader :analysis
@@ -65,11 +108,16 @@ module DSPy
           params(
             candidate_instructions: T::Array[String],
             analysis: T::Hash[Symbol, T.untyped],
-            metadata: T::Hash[Symbol, T.untyped]
+            metadata: T::Hash[Symbol, T.untyped],
+            predictor_instructions: T.nilable(T::Hash[Integer, T::Array[String]])
           ).void
         end
-        def initialize(candidate_instructions:, analysis:, metadata:)
+        def initialize(candidate_instructions:, analysis:, metadata:, predictor_instructions: nil)
           @candidate_instructions = candidate_instructions.freeze
+        normalized_predictor_instructions = (predictor_instructions || {}).each_with_object({}) do |(index, instructions), memo|
+            memo[index] = instructions.dup.freeze
+          end
+          @predictor_instructions = normalized_predictor_instructions.freeze
           @analysis = analysis.freeze
           @metadata = metadata.freeze
         end
@@ -88,21 +136,77 @@ module DSPy
       sig { returns(Config) }
       attr_reader :config
-      sig { params(config: T.nilable(Config)).void }
-      def initialize(config: nil)
+      sig do
+        params(
+          config: T.nilable(Config),
+          program: T.nilable(T.untyped),
+          trainset: T.nilable(T::Array[DSPy::Example])
+        ).void
+      end
+      def initialize(config: nil, program: nil, trainset: nil)
         @config = config || Config.new
+        @program = program
+        @trainset = trainset
+        @dataset_summary = nil
+        @program_code_string = nil
+        # Generate dataset summary if data-aware mode enabled (Python: use_dataset_summary)
+        if @config.use_dataset_summary && trainset && !trainset.empty?
+          begin
+            require_relative 'dataset_summary_generator'
+            @dataset_summary = DatasetSummaryGenerator.create_dataset_summary(
+              trainset,
+              @config.view_data_batch_size,
+              DSPy.current_lm,
+              verbose: @config.verbose
+            )
+          rescue => e
+            DSPy.logger.warn("Failed to generate dataset summary: #{e.message}")
+            @dataset_summary = nil
+          end
+        end
+        # Extract program source code if program-aware mode enabled
+        if @config.program_aware && program
+          @program_code_string = extract_program_source(program)
+        end
+      end
+      private
+      # Extract source code from program for program-aware mode
+      sig { params(program: T.untyped).returns(T.nilable(String)) }
+      def extract_program_source(program)
+        # Get the program's class
+        klass = program.is_a?(Class) ? program : program.class
+        # Try to get source location
+        source_location = klass.instance_method(:forward).source_location rescue nil
+        return nil unless source_location
+        file, line = source_location
+        # Read the source file and extract the class definition
+        # This is a simplified version - could be enhanced with method_source gem
+        code = "Program: #{klass.name}\nSource: #{file}:#{line}"
+        code
+      rescue => e
+        DSPy.logger.warn("Could not extract program source: #{e.message}")
+        nil
       end
+      public
       # Generate instruction candidates for a signature and training examples
       sig do
         params(
           signature_class: T.class_of(DSPy::Signature),
           examples: T::Array[T.untyped],
           few_shot_examples: T.nilable(T::Array[T.untyped]),
-          current_instruction: T.nilable(String)
+          current_instruction: T.nilable(String),
+          trial_logs: T.nilable(T::Hash[Integer, T::Hash[Symbol, T.untyped]])
         ).returns(ProposalResult)
       end
-      def propose_instructions(signature_class, examples, few_shot_examples: nil, current_instruction: nil)
+      def propose_instructions(signature_class, examples, few_shot_examples: nil, current_instruction: nil, trial_logs: nil)
         DSPy::Context.with_span(
           operation: 'optimization.instruction_proposal',
           'dspy.module' => 'GroundedProposer',
@@ -116,9 +220,11 @@ module DSPy
           # Generate instruction candidates
           candidates = generate_instruction_candidates(
-            signature_class,
-            analysis,
-            current_instruction
+            signature_class,
+            analysis,
+            current_instruction,
+            few_shot_examples: few_shot_examples,
+            trial_logs: trial_logs
           )
           # Filter and rank candidates
@@ -126,8 +232,8 @@ module DSPy
           metadata = {
             generation_timestamp: Time.now.iso8601,
-            model_used: @config.proposal_model,
-            num_examples_analyzed: [examples.size, @config.max_examples_for_analysis].min,
+            model_used: DSPy.current_lm.model,
+            num_examples_analyzed: [examples.size, @config.view_data_batch_size].min,
             original_instruction: current_instruction
           }
@@ -142,6 +248,50 @@ module DSPy
         end
       end
+      sig do
+        params(
+          trainset: T::Array[T.untyped],
+          program: T.untyped,
+          demo_candidates: T::Hash[Integer, T::Array[T::Array[DSPy::FewShotExample]]],
+          trial_logs: T.nilable(T::Hash[Integer, T::Hash[Symbol, T.untyped]]),
+          num_instruction_candidates: T.nilable(Integer)
+        ).returns(ProposalResult)
+      end
+      def propose_instructions_for_program(trainset:, program:, demo_candidates:, trial_logs: nil, num_instruction_candidates: nil)
+        num_candidates = num_instruction_candidates || @config.num_instruction_candidates
+        current_instruction = if program.respond_to?(:prompt) && program.prompt.respond_to?(:instruction)
+          program.prompt.instruction
+        else
+          nil
+        end
+        few_shot_examples = demo_candidates[0]&.flatten&.take(@config.num_demos_in_context) || []
+        signature_class = if program.respond_to?(:signature_class)
+          program.signature_class
+        else
+          raise ArgumentError, "Program must expose signature_class for instruction proposal"
+        end
+        base_result = propose_instructions(
+          signature_class,
+          trainset,
+          few_shot_examples: few_shot_examples,
+          current_instruction: current_instruction,
+          trial_logs: trial_logs
+        )
+        predictor_instructions = { 0 => base_result.candidate_instructions.take(num_candidates) }
+        ProposalResult.new(
+          candidate_instructions: base_result.candidate_instructions,
+          analysis: base_result.analysis,
+          metadata: base_result.metadata,
+          predictor_instructions: predictor_instructions
+        )
+      end
       private
       # Analyze the task based on signature and training examples
@@ -204,7 +354,7 @@ module DSPy
       # Analyze patterns in training examples
       sig { params(examples: T::Array[T.untyped]).returns(T::Hash[Symbol, T.untyped]) }
       def analyze_example_patterns(examples)
-        analysis_examples = examples.take(@config.max_examples_for_analysis)
+        analysis_examples = examples.take(@config.view_data_batch_size)
         {
           total_examples: examples.size,
@@ -323,12 +473,20 @@ module DSPy
         params(
           signature_class: T.class_of(DSPy::Signature),
           analysis: T::Hash[Symbol, T.untyped],
-          current_instruction: T.nilable(String)
+          current_instruction: T.nilable(String),
+          few_shot_examples: T.nilable(T::Array[T.untyped]),
+          trial_logs: T.nilable(T::Hash[Integer, T::Hash[Symbol, T.untyped]])
         ).returns(T::Array[String])
       end
-      def generate_instruction_candidates(signature_class, analysis, current_instruction)
+      def generate_instruction_candidates(signature_class, analysis, current_instruction, few_shot_examples: nil, trial_logs: nil)
         # Build context for instruction generation
-        context = build_generation_context(signature_class, analysis, current_instruction)
+        context = build_generation_context(
+          signature_class,
+          analysis,
+          current_instruction,
+          few_shot_examples: few_shot_examples,
+          trial_logs: trial_logs
+        )
         # Create instruction generation signature
         instruction_signature = create_instruction_generation_signature
@@ -346,16 +504,7 @@ module DSPy
             )
             instruction = result.instruction.strip
-            # Truncate if too long
-            if instruction.length > @config.max_instruction_length
-              instruction = instruction[0, @config.max_instruction_length].strip
-              # Try to end at a word boundary
-              if instruction.include?(' ')
-                instruction = instruction.rpartition(' ').first + '.'
-              end
-            end
             candidates << instruction if instruction.length > 0
           rescue => error
             DSPy.logger.warn("Failed to generate instruction candidate #{i + 1}: #{error.message}")
@@ -375,32 +524,64 @@ module DSPy
         params(
           signature_class: T.class_of(DSPy::Signature),
           analysis: T::Hash[Symbol, T.untyped],
-          current_instruction: T.nilable(String)
+          current_instruction: T.nilable(String),
+          few_shot_examples: T.nilable(T::Array[T.untyped]),
+          trial_logs: T.nilable(T::Hash[Integer, T::Hash[Symbol, T.untyped]])
         ).returns(String)
       end
-      def build_generation_context(signature_class, analysis, current_instruction)
+      def build_generation_context(signature_class, analysis, current_instruction, few_shot_examples: nil, trial_logs: nil)
         context_parts = []
-        context_parts << "Task: #{signature_class.description}" if @config.use_task_description
-        if @config.use_input_output_analysis
-          # Build detailed field descriptions including enum values
-          input_descriptions = analysis[:input_fields].map { |f| format_field_description(f) }
-          output_descriptions = analysis[:output_fields].map { |f| format_field_description(f) }
-          context_parts << "Input fields: #{input_descriptions.join(', ')}"
-          context_parts << "Output fields: #{output_descriptions.join(', ')}"
+        # Include dataset summary if enabled and available
+        if @config.use_dataset_summary && @dataset_summary
+          context_parts << "Dataset Summary: #{@dataset_summary}"
         end
+        # Include program code if enabled and available
+        if @config.program_aware && @program_code_string
+          context_parts << "Program Code:\n#{@program_code_string}"
+        end
+        # Always include task description (fundamental to understanding the task)
+        context_parts << "Task: #{signature_class.description}"
+        # Always include field analysis (fundamental to understanding inputs/outputs)
+        input_descriptions = analysis[:input_fields].map { |f| format_field_description(f) }
+        output_descriptions = analysis[:output_fields].map { |f| format_field_description(f) }
+        context_parts << "Input fields: #{input_descriptions.join(', ')}"
+        context_parts << "Output fields: #{output_descriptions.join(', ')}"
+        # Include task demos if enabled and available
+        if @config.use_task_demos && few_shot_examples && !few_shot_examples.empty?
+          demo_strings = few_shot_examples.take(@config.num_demos_in_context).map do |example|
+            format_example_as_demo(example)
+          end
+          context_parts << "Task Demos:\n#{demo_strings.join("\n\n")}"
+        end
         if analysis[:common_themes] && analysis[:common_themes].any?
           context_parts << "Task themes: #{analysis[:common_themes].join(', ')}"
         end
         if current_instruction
           context_parts << "Current instruction: \"#{current_instruction}\""
         end
-        context_parts.join("\n")
+        # Include tip if enabled
+        if @config.use_tip
+          tip = select_tip
+          context_parts << "Tip: #{tip}" if tip && !tip.empty?
+        end
+        if @config.use_instruct_history
+          history_summary = build_instruction_history_summary(trial_logs, predictor_index: 0, top_n: MAX_HISTORY_INSTRUCTIONS)
+          unless history_summary.empty?
+            context_parts << "Previous instructions:\n#{history_summary}"
+          end
+        end
+        context_parts.join("\n\n")
       end
       # Format field description with enum values if applicable
@@ -414,6 +595,83 @@ module DSPy
         end
       end
+      # Format an example as a demo for context
+      sig { params(example: T.untyped).returns(String) }
+      def format_example_as_demo(example)
+        return example.to_s unless example.respond_to?(:inputs) && example.respond_to?(:expected)
+        parts = []
+        # Format inputs
+        if example.inputs && !example.inputs.empty?
+          input_strs = example.inputs.map { |k, v| "#{k}: #{v.inspect}" }
+          parts << "Inputs: #{input_strs.join(', ')}"
+        end
+        # Format expected outputs
+        if example.expected && !example.expected.empty?
+          output_strs = example.expected.map { |k, v| "#{k}: #{v.inspect}" }
+          parts << "Expected: #{output_strs.join(', ')}"
+        end
+        parts.join(" | ")
+      end
+      # Select a tip based on configuration
+      sig { returns(T.nilable(String)) }
+      def select_tip
+        if @config.set_tip_randomly
+          # Randomly select a tip (excluding "none")
+          tip_keys = TIPS.keys.reject { |k| k == "none" }
+          selected_key = tip_keys.sample
+          TIPS[selected_key]
+        else
+          # Return empty string when not using random tips
+          ""
+        end
+      end
+      sig do
+        params(
+          trial_logs: T.nilable(T::Hash[Integer, T::Hash[Symbol, T.untyped]]),
+          predictor_index: Integer,
+          top_n: Integer
+        ).returns(String)
+      end
+      def build_instruction_history_summary(trial_logs, predictor_index:, top_n:)
+        return "" unless @config.use_instruct_history
+        logs = trial_logs || {}
+        aggregate = Hash.new { |hash, key| hash[key] = { total: 0.0, count: 0 } }
+        logs.each_value do |entry|
+          score = entry[:score]
+          next unless score.respond_to?(:to_f)
+          instructions = entry[:instructions]
+          instruction = nil
+          if instructions.respond_to?(:[])
+            instruction = instructions[predictor_index] || instructions[:default]
+          end
+          instruction ||= entry[:instruction]
+          next unless instruction.is_a?(String) && !instruction.empty?
+          aggregate[instruction][:total] += score.to_f
+          aggregate[instruction][:count] += 1
+        end
+        return "" if aggregate.empty?
+        ranked = aggregate.map do |instruction, stats|
+          average = stats[:total] / stats[:count]
+          [instruction, average]
+        end
+        top_entries = ranked.sort_by { |(_, avg)| -avg }.take(top_n).reverse
+        top_entries.map { |instruction, avg| format("%s | Score: %.4f", instruction, avg) }.join("\n\n")
+      end
       # Build requirements text for instruction generation
       sig { params(analysis: T::Hash[Symbol, T.untyped]).returns(String) }
       def build_requirements_text(analysis)
@@ -478,25 +736,21 @@ module DSPy
         # Filter out duplicates and empty candidates
         filtered = candidates.uniq.reject(&:empty?)
-        # Simple ranking based on length and content quality
+        # Simple ranking based on content quality (Python-compatible: no length scoring)
         filtered.sort_by do |instruction|
           score = 0
-          # Prefer moderate length instructions
-          length_score = [instruction.length, @config.max_instruction_length].min / @config.max_instruction_length.to_f
-          score += length_score * 0.3
           # Prefer instructions with action words
           action_words = %w[analyze classify generate explain solve determine identify]
           action_score = action_words.count { |word| instruction.downcase.include?(word) }
           score += action_score * 0.4
           # Prefer instructions that mention reasoning for complex tasks
           if analysis[:complexity_indicators][:requires_reasoning]
             reasoning_score = instruction.downcase.match?(/\b(step|think|reason|explain)\b/) ? 1 : 0
             score += reasoning_score * 0.3
           end
           -score # Negative for descending sort
         end
       end
@@ -588,9 +842,9 @@ module DSPy
           'proposal.num_candidates' => result.num_candidates,
           'proposal.best_instruction_length' => result.best_instruction.length,
           'proposal.analysis_themes' => result.analysis[:common_themes] || [],
-          'proposal.model_used' => @config.proposal_model
+          'proposal.model_used' => DSPy.current_lm.model
         })
       end
     end
   end
-end
+end

data/lib/dspy/re_act.rb CHANGED Viewed

@@ -144,6 +144,19 @@ module DSPy
       super(enhanced_signature)
     end
+    sig { override.returns(T::Array[[String, DSPy::Module]]) }
+    def named_predictors
+      pairs = T.let([], T::Array[[String, DSPy::Module]])
+      pairs << ["thought_generator", @thought_generator]
+      pairs << ["observation_processor", @observation_processor]
+      pairs
+    end
+    sig { override.returns(T::Array[DSPy::Module]) }
+    def predictors
+      named_predictors.map { |(_, predictor)| predictor }
+    end
     sig { params(kwargs: T.untyped).returns(T.untyped).override }
     def forward(**kwargs)
       # Validate input

data/lib/dspy/reflection_lm.rb ADDED Viewed

@@ -0,0 +1,36 @@
+# frozen_string_literal: true
+require 'sorbet-runtime'
+module DSPy
+  # Lightweight wrapper for running reflection prompts without structured outputs.
+  class ReflectionLM
+    extend T::Sig
+    sig do
+      params(
+        model_id: String,
+        api_key: T.nilable(String),
+        options: T.untyped
+      ).void
+    end
+    def initialize(model_id, api_key: nil, **options)
+      opts = options.each_with_object({}) do |(key, value), memo|
+        memo[key.to_sym] = value
+      end
+      opts[:api_key] = api_key if api_key
+      @lm = DSPy::LM.new(model_id, structured_outputs: false, schema_format: :json, **opts)
+    end
+    sig { params(prompt: String).returns(String) }
+    def call(prompt)
+      response = @lm.raw_chat([{ role: 'user', content: prompt }])
+      response.respond_to?(:content) ? response.content : response.to_s
+    end
+    sig { params(messages: T.nilable(T::Array[T::Hash[Symbol, String]]), block: T.nilable(T.proc.params(arg0: T.untyped).void)).returns(T.untyped) }
+    def raw_chat(messages = nil, &block)
+      @lm.raw_chat(messages, &block)
+    end
+  end
+end

data/lib/dspy/teleprompt/bootstrap_strategy.rb ADDED Viewed

@@ -0,0 +1,26 @@
+# frozen_string_literal: true
+require 'sorbet-runtime'
+module DSPy
+  module Teleprompt
+    # Bootstrap strategy enum for create_n_fewshot_demo_sets
+    # Provides type-safe alternatives to Python's magic number seeds
+    class BootstrapStrategy < T::Enum
+      enums do
+        # No demonstrations - zero-shot learning (Python seed = -3)
+        ZeroShot = new
+        # Labeled examples only - no bootstrap generation (Python seed = -2)
+        LabeledOnly = new
+        # Bootstrapped demonstrations without shuffling (Python seed = -1)
+        Unshuffled = new
+        # Bootstrapped demonstrations with shuffling and random size (Python seed >= 0)
+        # Requires separate seed parameter for reproducibility
+        Shuffled = new
+      end
+    end
+  end
+end