RubyGems - desiru - Versions diffs - 0.1.1 → 0.2.0 - Mend

desiru 0.1.1 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (25) hide show

checksums.yaml +4 -4
data/.claude/settings.local.json +11 -0
data/CHANGELOG.md +73 -0
data/CLAUDE.local.md +3 -0
data/CLAUDE.md +6 -1
data/Gemfile.lock +1 -1
data/README.md +7 -1
data/desiru-development-swarm.yml +185 -0
data/lib/desiru/core/compiler.rb +231 -0
data/lib/desiru/core/example.rb +96 -0
data/lib/desiru/core/prediction.rb +108 -0
data/lib/desiru/core/trace.rb +330 -0
data/lib/desiru/core/traceable.rb +61 -0
data/lib/desiru/core.rb +12 -0
data/lib/desiru/module.rb +8 -0
data/lib/desiru/modules/best_of_n.rb +306 -0
data/lib/desiru/modules/multi_chain_comparison.rb +72 -20
data/lib/desiru/modules/predict.rb +7 -0
data/lib/desiru/modules/program_of_thought.rb +227 -28
data/lib/desiru/optimizers/base.rb +31 -1
data/lib/desiru/optimizers/mipro_v2.rb +889 -0
data/lib/desiru/persistence/repositories/base_repository.rb +1 -1
data/lib/desiru/version.rb +1 -1
data/lib/desiru.rb +10 -0
metadata +13 -1

data/lib/desiru/modules/best_of_n.rb ADDED Viewed

@@ -0,0 +1,306 @@
+# frozen_string_literal: true
+module Desiru
+  module Modules
+    # BestOfN module that samples N outputs from a predictor and selects the best one
+    # based on configurable criteria (confidence, consistency, or external validation)
+    class BestOfN < Desiru::Module
+      SELECTION_CRITERIA = %i[confidence consistency llm_judge custom].freeze
+      DEFAULT_SIGNATURE = 'question: string -> answer: string'
+      def initialize(signature = nil, model: nil, **kwargs)
+        # Extract our specific options before passing to parent
+        @n_samples = kwargs.delete(:n_samples) || 5
+        @selection_criterion = validate_criterion(kwargs.delete(:selection_criterion) || :consistency)
+        @temperature = kwargs.delete(:temperature) || 0.7
+        @custom_selector = kwargs.delete(:custom_selector) # Proc that takes array of results
+        @base_module = kwargs.delete(:base_module) || Modules::Predict
+        @include_metadata = kwargs.delete(:include_metadata) || false
+        # Use default signature if none provided
+        signature ||= DEFAULT_SIGNATURE
+        # Pass remaining kwargs to parent (config, demos, metadata)
+        super
+      end
+      def forward(**inputs)
+        # Generate N samples
+        samples = generate_samples(inputs)
+        # Select the best sample based on criterion
+        best_sample = select_best(samples, inputs)
+        # Include metadata if requested
+        if @include_metadata || signature.output_fields.key?(:selection_metadata)
+          best_sample[:selection_metadata] = build_metadata(samples, best_sample)
+        end
+        # Clean up internal fields
+        best_sample.delete(:_confidence_score)
+        best_sample
+      rescue ArgumentError => e
+        # Re-raise ArgumentError for missing custom selector
+        raise e
+      rescue StandardError => e
+        Desiru.logger.error("BestOfN error: #{e.message}")
+        # Fallback to single sample
+        fallback_sample(inputs)
+      end
+      private
+      def validate_criterion(criterion)
+        unless SELECTION_CRITERIA.include?(criterion)
+          raise ArgumentError, "Invalid selection criterion: #{criterion}. " \
+                               "Must be one of: #{SELECTION_CRITERIA.join(', ')}"
+        end
+        criterion
+      end
+      def generate_samples(inputs)
+        samples = []
+        # Create module instance for generation
+        generator = if @base_module.is_a?(Class)
+                      @base_module.new(signature, model: model)
+                    else
+                      @base_module
+                    end
+        @n_samples.times do |i|
+          # Add variation seed to inputs for diversity
+          sample_inputs = inputs.merge(_sample_index: i)
+          # Use higher temperature for diversity
+          original_temp = model.instance_variable_get(:@temperature) if model.respond_to?(:instance_variable_get)
+          begin
+            # Temporarily set temperature if possible
+            model.temperature = @temperature if model.respond_to?(:temperature=)
+            # Generate sample
+            sample = if generator.respond_to?(:forward)
+                       generator.forward(**sample_inputs)
+                     else
+                       generator.call(**sample_inputs)
+                     end
+            # Remove the sample index from results
+            sample.delete(:_sample_index)
+            samples << sample
+          ensure
+            # Restore original temperature
+            model.temperature = original_temp if model.respond_to?(:temperature=) && original_temp
+          end
+        end
+        samples
+      end
+      def select_best(samples, inputs)
+        case @selection_criterion
+        when :confidence
+          select_by_confidence(samples)
+        when :consistency
+          select_by_consistency(samples)
+        when :llm_judge
+          select_by_llm_judge(samples, inputs)
+        when :custom
+          select_by_custom(samples)
+        else
+          samples.first # Fallback
+        end
+      end
+      def select_by_confidence(samples)
+        # Ask model to rate confidence for each sample
+        samples_with_scores = samples.map do |sample|
+          confidence = calculate_confidence(sample)
+          sample.merge(_confidence_score: confidence)
+        end
+        # Return sample with highest confidence (keep score for metadata)
+        samples_with_scores.max_by { |s| s[:_confidence_score] }
+      end
+      def calculate_confidence(sample)
+        # Build confidence prompt
+        prompt = "Rate the confidence (0-100) for this response:\n\n"
+        sample.each do |key, value|
+          next if key.to_s.start_with?('_')
+          prompt += "#{key}: #{value}\n"
+        end
+        prompt += "\nProvide only a number between 0 and 100:"
+        response = model.complete(
+          messages: [{ role: 'user', content: prompt }],
+          temperature: 0.1
+        )
+        # Extract confidence score
+        score = response[:content].scan(/\d+/).first&.to_i || 50
+        score.clamp(0, 100)
+      end
+      def select_by_consistency(samples)
+        # Group samples by their main output values
+        output_groups = Hash.new { |h, k| h[k] = [] }
+        # Find the main output field (first non-metadata field)
+        main_field = signature.output_fields.keys.find do |k|
+          !k.to_s.start_with?('_') && k.to_s != 'selection_metadata'
+        end
+        return samples.first unless main_field
+        # Convert to symbol to match sample keys
+        field_sym = main_field.to_sym
+        # Group samples by their main output
+        samples.each do |sample|
+          if sample[field_sym]
+            key = normalize_output(sample[field_sym])
+            output_groups[key] << sample
+          end
+        end
+        # Select the most consistent group
+        largest_group = output_groups.values.max_by(&:length)
+        # From the largest group, select the "centroid" - the one most similar to others
+        select_centroid(largest_group)
+      end
+      def normalize_output(value)
+        case value
+        when String
+          value.downcase.strip.gsub(/[[:punct:]]/, '')
+        when Numeric
+          value.round(2)
+        when Array
+          value.map { |v| normalize_output(v) }.sort
+        when Hash
+          value.transform_values { |v| normalize_output(v) }
+        else
+          value.to_s
+        end
+      end
+      def select_centroid(group)
+        return group.first if group.length == 1
+        # For now, return the middle element (could be improved with similarity metrics)
+        group[group.length / 2]
+      end
+      def select_by_llm_judge(samples, inputs)
+        # Build judge prompt
+        judge_prompt = "Given the following input and multiple response options, " \
+                       "select the best response:\n\n"
+        # Add original inputs
+        judge_prompt += "Input:\n"
+        inputs.each do |key, value|
+          judge_prompt += "  #{key}: #{value}\n"
+        end
+        # Add all samples
+        judge_prompt += "\nResponse Options:\n"
+        samples.each_with_index do |sample, i|
+          judge_prompt += "\n--- Option #{i + 1} ---\n"
+          sample.each do |key, value|
+            next if key.to_s.start_with?('_')
+            judge_prompt += "#{key}: #{value}\n"
+          end
+        end
+        judge_prompt += "\nSelect the best option (1-#{samples.length}) and briefly explain why:"
+        response = model.complete(
+          messages: [{ role: 'user', content: judge_prompt }],
+          temperature: 0.1
+        )
+        # Extract selected index
+        selection_match = response[:content].match(/option\s*#?(\d+)/i)
+        selected_index = if selection_match
+                           selection_match[1].to_i - 1
+                         else
+                           0
+                         end
+        selected_index = selected_index.clamp(0, samples.length - 1)
+        samples[selected_index]
+      end
+      def select_by_custom(samples)
+        unless @custom_selector.respond_to?(:call)
+          raise ArgumentError, "Custom selector must be provided when using :custom criterion"
+        end
+        @custom_selector.call(samples) || samples.first
+      end
+      def build_metadata(samples, selected)
+        metadata = {
+          total_samples: samples.length,
+          selection_criterion: @selection_criterion,
+          temperature: @temperature
+        }
+        # Add criterion-specific metadata
+        case @selection_criterion
+        when :consistency
+          # Count how many samples agree with the selected one
+          main_field = signature.output_fields.keys.find do |k|
+            !k.to_s.start_with?('_') && k.to_s != 'selection_metadata'
+          end
+          if main_field
+            # Convert to symbol to match sample keys
+            field_sym = main_field.to_sym
+            if selected[field_sym]
+              selected_value = normalize_output(selected[field_sym])
+              agreement_count = samples.count do |s|
+                normalize_output(s[field_sym]) == selected_value
+              end
+              metadata[:agreement_rate] = agreement_count.to_f / samples.length
+            end
+          end
+        when :confidence
+          # Include confidence scores if available
+          metadata[:selected_confidence] = selected[:_confidence_score] if selected[:_confidence_score]
+        end
+        metadata
+      end
+      def fallback_sample(inputs)
+        # Generate a single sample as fallback
+        generator = if @base_module.is_a?(Class)
+                      @base_module.new(signature, model: model)
+                    else
+                      @base_module
+                    end
+        if generator.respond_to?(:forward)
+          generator.forward(**inputs)
+        else
+          generator.call(**inputs)
+        end
+      end
+    end
+  end
+end
+# Register in the main module namespace for convenience
+module Desiru
+  BestOfN = Modules::BestOfN
+end

data/lib/desiru/modules/multi_chain_comparison.rb CHANGED Viewed

@@ -5,14 +5,25 @@ module Desiru
     # MultiChainComparison module that generates multiple chain-of-thought
     # reasoning paths and compares them to produce the best answer
     class MultiChainComparison < Desiru::Module
+      DEFAULT_SIGNATURE = 'question: string -> answer: string, reasoning: string'
       def initialize(signature = nil, model: nil, **kwargs)
+        # Extract our specific options before passing to parent
+        @num_chains = kwargs.delete(:num_chains) || 3
+        @comparison_strategy = kwargs.delete(:comparison_strategy) || :vote
+        @temperature = kwargs.delete(:temperature) || 0.7
+        # Use default signature if none provided
+        signature ||= DEFAULT_SIGNATURE
+        # Pass remaining kwargs to parent (config, demos, metadata)
         super
-        @num_chains = kwargs[:num_chains] || 3
-        @comparison_strategy = kwargs[:comparison_strategy] || :vote
-        @temperature = kwargs[:temperature] || 0.7
       end
       def forward(**inputs)
+        # Handle edge case of zero chains
+        return {} if @num_chains <= 0
         # Generate multiple reasoning chains
         chains = generate_chains(inputs)
@@ -25,11 +36,14 @@ module Desiru
                       when :confidence
                         select_by_confidence(chains)
                       else
-                        chains.first # Fallback to first chain
+                        chains.first || {} # Fallback to first chain or empty hash
                       end
+        # Ensure best_result is not nil
+        best_result ||= {}
         # Include comparison metadata if requested
-        if signature.output_fields.key?(:comparison_data)
+        if signature.output_fields.key?('comparison_data') || signature.output_fields.key?(:comparison_data)
           best_result[:comparison_data] = {
             num_chains: chains.length,
             strategy: @comparison_strategy,
@@ -77,7 +91,7 @@ module Desiru
         if signature.output_fields.any?
           prompt += "\nMake sure your answer includes:\n"
           signature.output_fields.each do |name, field|
-            next if %i[reasoning comparison_data].include?(name)
+            next if %w[reasoning comparison_data].include?(name.to_s)
             prompt += "- #{name}: #{field.description || field.type}\n"
           end
@@ -95,15 +109,33 @@ module Desiru
         # Extract answer
         answer_match = response.match(/ANSWER:\s*(.+)/mi)
-        answer_text = answer_match ? answer_match[1].strip : ""
-        # Try to parse structured answer
-        if answer_text.include?(':') || answer_text.include?('{')
-          result.merge!(parse_structured_answer(answer_text))
+        if answer_match
+          answer_text = answer_match[1].strip
+          # Try to parse structured answer
+          if answer_text.include?(':') || answer_text.include?('{')
+            result.merge!(parse_structured_answer(answer_text))
+          elsif !answer_text.empty?
+            # Single value answer
+            main_output_field = signature.output_fields.keys.map(&:to_sym).find do |k|
+              !%i[reasoning comparison_data].include?(k)
+            end
+            result[main_output_field] = answer_text if main_output_field
+          end
         else
-          # Single value answer
-          main_output_field = signature.output_fields.keys.find { |k| !%i[reasoning comparison_data].include?(k) }
-          result[main_output_field] = answer_text if main_output_field
+          # No ANSWER: section found - check if we should extract from reasoning
+          signature.output_fields.keys.map(&:to_sym).find do |k|
+            !%i[reasoning comparison_data].include?(k)
+          end
+          # Don't set the field if there's no clear answer
+          # result[main_output_field] = nil if main_output_field
+        end
+        # Parse any additional fields that might be in the response
+        response.scan(/(\w+):\s*([^\n]+)/).each do |key, value|
+          key_sym = key.downcase.to_sym
+          result[key_sym] = value.strip if signature.output_fields.key?(key_sym) && !result.key?(key_sym)
         end
         result
@@ -115,31 +147,42 @@ module Desiru
         # Try to parse as key-value pairs
         answer_text.scan(/(\w+):\s*([^\n,}]+)/).each do |key, value|
           key_sym = key.downcase.to_sym
-          parsed[key_sym] = value.strip if signature.output_fields.key?(key_sym)
+          if signature.output_fields.key?(key_sym) || signature.output_fields.key?(key.downcase)
+            parsed[key_sym] =
+              value.strip
+          end
         end
         parsed
       end
       def vote_on_chains(chains)
+        return {} if chains.empty?
         # Count votes for each unique answer
         votes = Hash.new(0)
         answer_to_chain = {}
         chains.each do |chain|
           # Get the main answer field (first non-metadata field)
-          answer_key = signature.output_fields.keys.find { |k| !%i[reasoning comparison_data].include?(k) }
+          answer_key = signature.output_fields.keys.map(&:to_sym).find do |k|
+            !%i[reasoning comparison_data].include?(k)
+          end
           answer_value = chain[answer_key]
-          if answer_value
+          if answer_value && !answer_value.to_s.empty?
             votes[answer_value] += 1
             answer_to_chain[answer_value] ||= chain
           end
         end
         # Return the chain with the most common answer
-        winning_answer = votes.max_by { |_, count| count }&.first
-        answer_to_chain[winning_answer] || chains.first
+        if votes.empty?
+          chains.first || {}
+        else
+          winning_answer = votes.max_by { |_, count| count }.first
+          answer_to_chain[winning_answer] || chains.first || {}
+        end
       end
       def llm_judge_chains(chains, original_inputs)
@@ -157,7 +200,9 @@ module Desiru
           judge_prompt += "\n--- Attempt #{i + 1} ---\n"
           judge_prompt += "Reasoning: #{chain[:reasoning]}\n"
-          answer_key = signature.output_fields.keys.find { |k| !%i[reasoning comparison_data].include?(k) }
+          answer_key = signature.output_fields.keys.map(&:to_sym).find do |k|
+            !%i[reasoning comparison_data].include?(k)
+          end
           judge_prompt += "Answer: #{chain[answer_key]}\n" if chain[answer_key]
         end
@@ -182,7 +227,9 @@ module Desiru
           confidence_prompt = "Rate your confidence (0-100) in this reasoning and answer:\n"
           confidence_prompt += "Reasoning: #{chain[:reasoning]}\n"
-          answer_key = signature.output_fields.keys.find { |k| !%i[reasoning comparison_data].include?(k) }
+          answer_key = signature.output_fields.keys.map(&:to_sym).find do |k|
+            !%i[reasoning comparison_data].include?(k)
+          end
           confidence_prompt += "Answer: #{chain[answer_key]}\n" if chain[answer_key]
           confidence_prompt += "\nRespond with just a number between 0 and 100:"
@@ -202,3 +249,8 @@ module Desiru
     end
   end
 end
+# Register in the main module namespace for convenience
+module Desiru
+  MultiChainComparison = Modules::MultiChainComparison
+end

data/lib/desiru/modules/predict.rb CHANGED Viewed

@@ -4,6 +4,13 @@ module Desiru
   module Modules
     # Basic prediction module - the fundamental building block
     class Predict < Module
+      DEFAULT_SIGNATURE = 'question: string -> answer: string'
+      def initialize(signature = nil, model: nil, **)
+        signature ||= DEFAULT_SIGNATURE
+        super
+      end
       def forward(inputs)
         prompt = build_prompt(inputs)