RubyGems - desiru - Versions diffs - 0.1.0 → 0.2.0 - Mend

desiru 0.1.0 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (110) hide show

checksums.yaml +4 -4
data/.claude/settings.local.json +11 -0
data/.env.example +34 -0
data/.rubocop.yml +7 -4
data/.ruby-version +1 -0
data/CHANGELOG.md +73 -0
data/CLAUDE.local.md +3 -0
data/CLAUDE.md +10 -1
data/Gemfile +21 -2
data/Gemfile.lock +88 -13
data/README.md +301 -2
data/Rakefile +1 -0
data/db/migrations/001_create_initial_tables.rb +96 -0
data/db/migrations/002_create_job_results.rb +39 -0
data/desiru-development-swarm.yml +185 -0
data/desiru.db +0 -0
data/desiru.gemspec +2 -5
data/docs/background_processing_roadmap.md +87 -0
data/docs/job_scheduling.md +167 -0
data/dspy-analysis-swarm.yml +60 -0
data/dspy-feature-analysis.md +121 -0
data/examples/README.md +69 -0
data/examples/api_with_persistence.rb +122 -0
data/examples/assertions_example.rb +232 -0
data/examples/async_processing.rb +2 -0
data/examples/few_shot_learning.rb +1 -2
data/examples/graphql_api.rb +4 -2
data/examples/graphql_integration.rb +3 -3
data/examples/graphql_optimization_summary.md +143 -0
data/examples/graphql_performance_benchmark.rb +247 -0
data/examples/persistence_example.rb +102 -0
data/examples/react_agent.rb +203 -0
data/examples/rest_api.rb +173 -0
data/examples/rest_api_advanced.rb +333 -0
data/examples/scheduled_job_example.rb +116 -0
data/examples/simple_qa.rb +1 -2
data/examples/sinatra_api.rb +109 -0
data/examples/typed_signatures.rb +1 -2
data/graphql_optimization_summary.md +53 -0
data/lib/desiru/api/grape_integration.rb +284 -0
data/lib/desiru/api/persistence_middleware.rb +148 -0
data/lib/desiru/api/sinatra_integration.rb +217 -0
data/lib/desiru/api.rb +42 -0
data/lib/desiru/assertions.rb +74 -0
data/lib/desiru/async_status.rb +65 -0
data/lib/desiru/cache.rb +1 -1
data/lib/desiru/configuration.rb +2 -1
data/lib/desiru/core/compiler.rb +231 -0
data/lib/desiru/core/example.rb +96 -0
data/lib/desiru/core/prediction.rb +108 -0
data/lib/desiru/core/trace.rb +330 -0
data/lib/desiru/core/traceable.rb +61 -0
data/lib/desiru/core.rb +12 -0
data/lib/desiru/errors.rb +160 -0
data/lib/desiru/field.rb +17 -14
data/lib/desiru/graphql/batch_loader.rb +85 -0
data/lib/desiru/graphql/data_loader.rb +242 -75
data/lib/desiru/graphql/enum_builder.rb +75 -0
data/lib/desiru/graphql/executor.rb +37 -4
data/lib/desiru/graphql/schema_generator.rb +62 -158
data/lib/desiru/graphql/type_builder.rb +138 -0
data/lib/desiru/graphql/type_cache_warmer.rb +91 -0
data/lib/desiru/jobs/async_predict.rb +1 -1
data/lib/desiru/jobs/base.rb +67 -0
data/lib/desiru/jobs/batch_processor.rb +6 -6
data/lib/desiru/jobs/retriable.rb +119 -0
data/lib/desiru/jobs/retry_strategies.rb +169 -0
data/lib/desiru/jobs/scheduler.rb +219 -0
data/lib/desiru/jobs/webhook_notifier.rb +242 -0
data/lib/desiru/models/anthropic.rb +164 -0
data/lib/desiru/models/base.rb +37 -3
data/lib/desiru/models/open_ai.rb +151 -0
data/lib/desiru/models/open_router.rb +161 -0
data/lib/desiru/module.rb +67 -9
data/lib/desiru/modules/best_of_n.rb +306 -0
data/lib/desiru/modules/chain_of_thought.rb +3 -3
data/lib/desiru/modules/majority.rb +51 -0
data/lib/desiru/modules/multi_chain_comparison.rb +256 -0
data/lib/desiru/modules/predict.rb +15 -1
data/lib/desiru/modules/program_of_thought.rb +338 -0
data/lib/desiru/modules/react.rb +273 -0
data/lib/desiru/modules/retrieve.rb +4 -2
data/lib/desiru/optimizers/base.rb +32 -4
data/lib/desiru/optimizers/bootstrap_few_shot.rb +2 -2
data/lib/desiru/optimizers/copro.rb +268 -0
data/lib/desiru/optimizers/knn_few_shot.rb +185 -0
data/lib/desiru/optimizers/mipro_v2.rb +889 -0
data/lib/desiru/persistence/database.rb +71 -0
data/lib/desiru/persistence/models/api_request.rb +38 -0
data/lib/desiru/persistence/models/job_result.rb +138 -0
data/lib/desiru/persistence/models/module_execution.rb +37 -0
data/lib/desiru/persistence/models/optimization_result.rb +28 -0
data/lib/desiru/persistence/models/training_example.rb +25 -0
data/lib/desiru/persistence/models.rb +11 -0
data/lib/desiru/persistence/repositories/api_request_repository.rb +98 -0
data/lib/desiru/persistence/repositories/base_repository.rb +77 -0
data/lib/desiru/persistence/repositories/job_result_repository.rb +116 -0
data/lib/desiru/persistence/repositories/module_execution_repository.rb +85 -0
data/lib/desiru/persistence/repositories/optimization_result_repository.rb +67 -0
data/lib/desiru/persistence/repositories/training_example_repository.rb +102 -0
data/lib/desiru/persistence/repository.rb +29 -0
data/lib/desiru/persistence/setup.rb +77 -0
data/lib/desiru/persistence.rb +49 -0
data/lib/desiru/registry.rb +3 -5
data/lib/desiru/signature.rb +91 -24
data/lib/desiru/version.rb +1 -1
data/lib/desiru.rb +33 -8
data/missing-features-analysis.md +192 -0
metadata +75 -45
data/lib/desiru/models/raix_adapter.rb +0 -210

data/lib/desiru/modules/react.rb ADDED Viewed

@@ -0,0 +1,273 @@
+# frozen_string_literal: true
+require_relative '../module'
+require_relative 'chain_of_thought'
+module Desiru
+  module Modules
+    # ReAct (Reasoning and Acting) module for tool-using AI agents
+    # This module allows the language model to iteratively reason about a task
+    # and use tools to gather information before producing a final answer
+    class ReAct < Desiru::Module
+      attr_reader :max_iterations, :tools, :react_module, :extract_module
+      def initialize(signature, tools: [], max_iterations: 5, model: nil)
+        super(signature, model: model)
+        @tools = normalize_tools(tools)
+        @max_iterations = max_iterations
+        # Build the ReAct signature for reasoning and tool selection
+        react_signature = build_react_signature
+        @react_module = ChainOfThought.new(react_signature, model: @model)
+        # Build extraction signature for final output
+        extract_signature = build_extract_signature
+        @extract_module = ChainOfThought.new(extract_signature, model: @model)
+      end
+      def forward(inputs)
+        trajectory = []
+        max_iterations.times do |_iteration|
+          # Get the next action from the model
+          react_inputs = prepare_react_inputs(inputs, trajectory)
+          react_output = react_module.call(react_inputs)
+          # Extract the tool name and arguments
+          tool_name = react_output[:next_tool_name]
+          tool_args = parse_tool_args(react_output[:next_tool_args])
+          # Add reasoning to trajectory
+          trajectory << {
+            thought: react_output[:next_thought],
+            tool: tool_name,
+            args: tool_args
+          }
+          # Check if we're done
+          break if tool_name == "finish"
+          # Execute the tool
+          begin
+            tool_result = execute_tool(tool_name, tool_args)
+            trajectory.last[:observation] = tool_result
+          rescue StandardError => e
+            trajectory.last[:observation] = "Error: #{e.message}"
+          end
+        end
+        # Extract final outputs from trajectory
+        extract_inputs = prepare_extract_inputs(inputs, trajectory)
+        extract_module.call(extract_inputs)
+      end
+      private
+      def normalize_tools(tools)
+        # Convert tools to a consistent format
+        normalized = {}
+        tools.each do |tool|
+          case tool
+          when Hash
+            # Assume hash has name and function keys
+            normalized[tool[:name] || tool["name"]] = tool[:function] || tool["function"]
+          when Array
+            # Assume array of [name, function] pairs
+            name, function = tool
+            normalized[name] = function
+          else
+            # Assume it's a callable with a name method
+            if tool.respond_to?(:name) && tool.respond_to?(:call)
+              normalized[tool.name] = tool
+            elsif tool.is_a?(Method) || tool.is_a?(Proc)
+              # Use the method/proc name or generate one
+              name = tool.respond_to?(:name) ? tool.name.to_s : "tool_#{normalized.size}"
+              normalized[name] = tool
+            end
+          end
+        end
+        # Always include the finish tool
+        normalized["finish"] = -> { "Task completed" }
+        normalized
+      end
+      def build_react_signature
+        # Build signature for reasoning and tool selection
+        input_fields = signature.input_fields.keys.join(", ")
+        # Create the ReAct signature
+        react_sig = "#{input_fields}, trajectory -> next_thought, next_tool_name, next_tool_args"
+        # Add instructions
+        instructions = <<~INST
+          You are an AI agent that can use tools to accomplish tasks.
+          Available tools:
+          #{format_tool_descriptions}
+          Based on the input and trajectory so far, reason about what to do next.
+          Then select a tool to use and provide the arguments for that tool.
+          When you have gathered enough information to answer the question,
+          use the "finish" tool to complete the task.
+        INST
+        Signature.new(react_sig, descriptions: { 'next_thought' => instructions })
+      end
+      def build_extract_signature
+        # Build signature for extracting final outputs
+        input_fields = signature.input_fields.keys.join(", ")
+        output_fields = signature.output_fields.keys.join(", ")
+        extract_sig = "#{input_fields}, trajectory -> #{output_fields}"
+        instructions = <<~INST
+          Based on the trajectory of thoughts and tool observations,
+          extract the final #{output_fields} to answer the original question.
+        INST
+        Signature.new(extract_sig, descriptions: { output_fields => instructions })
+      end
+      def format_tool_descriptions
+        tools.map do |name, function|
+          if name == "finish"
+            "- finish: Mark the task as complete when you have enough information"
+          else
+            # Try to extract description from function
+            desc = if function.respond_to?(:description)
+                     function.description
+                   elsif function.respond_to?(:to_s)
+                     function.to_s
+                   else
+                     "Tool: #{name}"
+                   end
+            "- #{name}: #{desc}"
+          end
+        end.join("\n")
+      end
+      def prepare_react_inputs(inputs, trajectory)
+        inputs.merge(
+          trajectory: format_trajectory(trajectory)
+        )
+      end
+      def prepare_extract_inputs(inputs, trajectory)
+        inputs.merge(
+          trajectory: format_trajectory(trajectory)
+        )
+      end
+      def format_trajectory(trajectory)
+        return "No actions taken yet." if trajectory.empty?
+        trajectory.map.with_index do |step, i|
+          parts = ["Step #{i + 1}:"]
+          parts << "Thought: #{step[:thought]}" if step[:thought]
+          parts << "Tool: #{step[:tool]}" if step[:tool]
+          parts << "Args: #{step[:args]}" if step[:args] && !step[:args].empty?
+          parts << "Observation: #{step[:observation]}" if step[:observation]
+          parts.join("\n")
+        end.join("\n\n")
+      end
+      def parse_tool_args(args_string)
+        # Parse tool arguments from string format
+        return {} if args_string.nil? || args_string.strip.empty?
+        # Try to parse as JSON first
+        begin
+          require 'json'
+          JSON.parse(args_string, symbolize_names: true)
+        rescue JSON::ParserError
+          # Fallback: parse simple key:value pairs
+          parse_simple_args(args_string)
+        end
+      end
+      def parse_simple_args(args_string)
+        # Parse simple key:value format
+        args = {}
+        # Match patterns like key:value or key=value
+        args_string.scan(/(\w+)[:=]\s*([^,]+)/).each do |key, value|
+          # Clean up the value
+          value = value.strip.gsub(/^["']|["']$/, '') # Remove quotes
+          # Try to convert to appropriate type
+          args[key.to_sym] = case value.downcase
+                             when 'true' then true
+                             when 'false' then false
+                             when /^\d+$/ then value.to_i
+                             when /^\d+\.\d+$/ then value.to_f
+                             else value
+                             end
+        end
+        args
+      end
+      def execute_tool(tool_name, args)
+        tool = tools[tool_name]
+        raise "Unknown tool: #{tool_name}" unless tool
+        # Call the tool with arguments
+        if tool.arity.zero?
+          tool.call
+        elsif tool.arity == 1 && args.is_a?(Hash)
+          # Pass args as keyword arguments if possible
+          if tool.respond_to?(:parameters)
+            param_types = tool.parameters.map(&:first)
+            if param_types.include?(:keyreq) || param_types.include?(:key)
+              tool.call(**args)
+            else
+              tool.call(args)
+            end
+          else
+            tool.call(args)
+          end
+        else
+          # Pass args as positional arguments
+          tool.call(*args.values)
+        end
+      end
+      # Support for truncating trajectory if it gets too long
+      def truncate_trajectory(trajectory, max_length: 3000)
+        formatted = format_trajectory(trajectory)
+        return trajectory if formatted.length <= max_length
+        # Remove oldest steps until we're under the limit
+        truncated = trajectory.dup
+        # Keep removing the oldest steps until we're under the limit
+        while truncated.length > 1
+          truncated_formatted = format_trajectory(truncated)
+          break if truncated_formatted.length <= max_length
+          truncated.shift
+        end
+        # If even a single step is too long, truncate its content
+        if truncated.length == 1 && format_trajectory(truncated).length > max_length
+          step = truncated[0]
+          # Truncate the observation if it exists and is long
+          if step[:observation] && step[:observation].length > 100
+            step[:observation] = "#{step[:observation][0..100]}... (truncated)"
+          end
+          # Truncate thought if it's very long
+          step[:thought] = "#{step[:thought][0..100]}... (truncated)" if step[:thought] && step[:thought].length > 100
+        end
+        truncated
+      end
+    end
+  end
+end

data/lib/desiru/modules/retrieve.rb CHANGED Viewed

@@ -21,6 +21,7 @@ module Desiru
       def forward(**inputs)
         query = inputs[:query]
         # Handle k parameter - it might come as nil if optional
+        # Note: 'k' is the standard parameter name in information retrieval
         k = inputs.fetch(:k, 5)
         k = 5 if k.nil? # Ensure we have a value even if nil was passed
@@ -67,7 +68,7 @@ module Desiru
         raise NotImplementedError, 'Subclasses must implement #add'
       end
-      def search(_query, k: 5)
+      def search(_query, k: 5) # rubocop:disable Naming/MethodParameterName
         raise NotImplementedError, 'Subclasses must implement #search'
       end
@@ -83,6 +84,7 @@ module Desiru
     # In-memory backend implementation for development and testing
     class InMemoryBackend < Backend
       def initialize(distance_metric: :cosine)
+        super()
         @documents = []
         @embeddings = []
         @distance_metric = distance_metric
@@ -107,7 +109,7 @@ module Desiru
         @embeddings.concat(embeddings)
       end
-      def search(query, k: 5)
+      def search(query, k: 5) # rubocop:disable Naming/MethodParameterName
         return [] if @documents.empty?
         # Generate query embedding

data/lib/desiru/optimizers/base.rb CHANGED Viewed

@@ -22,7 +22,21 @@ module Desiru
       def evaluate(program, dataset)
         scores = dataset.map do |example|
-          prediction = program.call(example.reject { |k, _| %i[answer output].include?(k) })
+          # Extract inputs (exclude answer/output fields)
+          inputs = {}
+          if example.respond_to?(:to_h)
+            example.to_h.each do |k, v|
+              inputs[k] = v unless %i[answer output].include?(k)
+            end
+          elsif example.is_a?(Hash)
+            example.each do |k, v|
+              inputs[k] = v unless %i[answer output].include?(k.to_sym)
+            end
+          else
+            inputs = example
+          end
+          prediction = program.call(inputs)
           score_prediction(prediction, example)
         end
@@ -55,6 +69,10 @@ module Desiru
           f1_score(prediction, ground_truth)
         when :accuracy
           accuracy_score(prediction, ground_truth)
+        when :confidence
+          confidence_score(prediction, ground_truth)
+        when :consistency
+          consistency_score(prediction, ground_truth)
         else
           raise OptimizerError, "Unknown metric: #{@metric}"
         end
@@ -86,13 +104,23 @@ module Desiru
         exact_match_score(prediction, ground_truth)
       end
+      def confidence_score(prediction, ground_truth)
+        # Simple confidence score based on exact match
+        # In a real implementation, this would use model confidence scores
+        (exact_match_score(prediction, ground_truth) * 0.9) + 0.1
+      end
+      def consistency_score(prediction, ground_truth)
+        # Simple consistency score based on exact match
+        # In a real implementation, this would track consistency across examples
+        (exact_match_score(prediction, ground_truth) * 0.8) + 0.2
+      end
       def extract_answer(data)
         case data
-        when ModuleResult, ProgramResult
+        when ModuleResult, ProgramResult, Hash
           # Try common answer fields
           data[:answer] || data[:output] || data[:result] || data.values.first
-        when Hash
-          data[:answer] || data[:output] || data[:result] || data.values.first
         else
           data
         end

data/lib/desiru/optimizers/bootstrap_few_shot.rb CHANGED Viewed

@@ -80,7 +80,7 @@ module Desiru
           begin
             # Get module prediction
-            inputs = example.reject { |k, _| %i[answer output].include?(k) }
+            inputs = example.except(:answer, :output)
             prediction = module_instance.call(inputs)
             # Score the prediction
@@ -110,7 +110,7 @@ module Desiru
         # Add labeled examples if available
         labeled = examples.select { |ex| ex[:answer] || ex[:output] }
         labeled_demos = labeled.first(config[:max_labeled_demos]).map do |ex|
-          inputs = ex.reject { |k, _| %i[answer output].include?(k) }
+          inputs = ex.except(:answer, :output)
           {
             input: format_demo_input(inputs),
             output: format_demo_output(ex),

data/lib/desiru/optimizers/copro.rb ADDED Viewed

@@ -0,0 +1,268 @@
+# frozen_string_literal: true
+module Desiru
+  module Optimizers
+    # COPRO (Cooperative Prompt Optimization) optimizer
+    # Generates and refines instructions for each module using coordinate ascent
+    class COPRO < Base
+      def initialize(config = {})
+        super
+        @max_iterations = config[:max_iterations] || 10
+        @num_candidates = config[:num_candidates] || 5
+        @temperature = config[:temperature] || 0.7
+        @improvement_threshold = config[:improvement_threshold] || 0.01
+      end
+      def compile(program, trainset, valset = nil, **kwargs)
+        valset ||= trainset # Use trainset for validation if no valset provided
+        # Initialize best score
+        best_score = evaluate_program(program, valset, kwargs[:metric])
+        best_program = program.dup
+        Desiru.logger.info("[COPRO] Initial score: #{best_score}")
+        # Iterate through optimization rounds
+        @max_iterations.times do |iteration|
+          Desiru.logger.info("[COPRO] Starting iteration #{iteration + 1}/#{@max_iterations}")
+          # Try to improve each predictor
+          improved = false
+          program.predictors.each do |name, predictor|
+            Desiru.logger.info("[COPRO] Optimizing predictor: #{name}")
+            # Generate instruction candidates
+            candidates = generate_instruction_candidates(predictor, trainset, name)
+            # Evaluate each candidate
+            best_candidate_score = best_score
+            best_candidate_instruction = nil
+            candidates.each do |instruction|
+              # Create program with new instruction
+              candidate_program = create_program_with_instruction(
+                best_program,
+                name,
+                instruction
+              )
+              # Evaluate
+              score = evaluate_program(candidate_program, valset, kwargs[:metric])
+              if score > best_candidate_score
+                best_candidate_score = score
+                best_candidate_instruction = instruction
+              end
+            end
+            # Update if improved
+            next unless best_candidate_instruction && (best_candidate_score - best_score) > @improvement_threshold
+            Desiru.logger.info("[COPRO] Improved #{name}: #{best_score} -> #{best_candidate_score}")
+            best_program = create_program_with_instruction(
+              best_program,
+              name,
+              best_candidate_instruction
+            )
+            best_score = best_candidate_score
+            improved = true
+          end
+          # Early stopping if no improvement
+          break unless improved
+        end
+        Desiru.logger.info("[COPRO] Final score: #{best_score}")
+        best_program
+      end
+      private
+      def generate_instruction_candidates(predictor, trainset, predictor_name)
+        candidates = []
+        # Get examples of good performance
+        good_examples = select_good_examples(predictor, trainset)
+        # Generate initial instruction based on signature
+        signature = predictor.signature
+        base_instruction = generate_base_instruction(signature, predictor_name)
+        candidates << base_instruction
+        # Generate variations
+        (@num_candidates - 1).times do |i|
+          variation_prompt = build_variation_prompt(
+            base_instruction,
+            signature,
+            good_examples,
+            i
+          )
+          response = model.complete(
+            messages: [{ role: 'user', content: variation_prompt }],
+            temperature: @temperature
+          )
+          instruction = extract_instruction(response[:content])
+          candidates << instruction if instruction
+        end
+        candidates.compact.uniq
+      end
+      def generate_base_instruction(signature, predictor_name)
+        instruction = "You are solving a #{predictor_name} task.\n\n"
+        # Add input description
+        if signature.input_fields.any?
+          instruction += "Given the following inputs:\n"
+          signature.input_fields.each do |name, field|
+            instruction += "- #{name}: #{field.description || field.type}\n"
+          end
+          instruction += "\n"
+        end
+        # Add output description
+        if signature.output_fields.any?
+          instruction += "Produce the following outputs:\n"
+          signature.output_fields.each do |name, field|
+            instruction += "- #{name}: #{field.description || field.type}\n"
+          end
+        end
+        instruction
+      end
+      def build_variation_prompt(base_instruction, signature, good_examples, variation_index)
+        prompt = "Improve the following instruction for better performance:\n\n"
+        prompt += "Current instruction:\n#{base_instruction}\n\n"
+        # Add task context
+        prompt += "Task signature: #{signature}\n\n"
+        # Add examples of good performance
+        if good_examples.any?
+          prompt += "Examples of successful completions:\n"
+          good_examples.take(3).each do |example|
+            prompt += format_example(example)
+          end
+        end
+        # Request specific type of improvement
+        improvement_types = [
+          "Make the instruction more specific and detailed",
+          "Add helpful constraints or guidelines",
+          "Clarify any ambiguous requirements",
+          "Add examples or patterns to follow",
+          "Emphasize important aspects of the task"
+        ]
+        prompt += "\n#{improvement_types[variation_index % improvement_types.length]}.\n"
+        prompt += "Provide only the improved instruction:"
+        prompt
+      end
+      def select_good_examples(predictor, trainset)
+        good_examples = []
+        trainset.each do |example|
+          # Run predictor on example inputs
+          result = predictor.call(example[:inputs])
+          # Check if output matches expected
+          good_examples << example if outputs_match?(result, example[:outputs])
+        rescue StandardError
+          # Skip failed examples
+        end
+        good_examples
+      end
+      def outputs_match?(actual, expected)
+        return false unless actual.is_a?(Hash) && expected.is_a?(Hash)
+        expected.all? do |key, expected_value|
+          actual_value = actual[key]
+          # Flexible matching for different types
+          case expected_value
+          when String
+            actual_value.to_s.strip.downcase == expected_value.strip.downcase
+          when Numeric
+            (actual_value.to_f - expected_value.to_f).abs < 0.001
+          else
+            actual_value == expected_value
+          end
+        end
+      end
+      def format_example(example)
+        formatted = "\nExample:\n"
+        if example[:inputs]
+          formatted += "Inputs: "
+          formatted += example[:inputs].map { |k, v| "#{k}=#{v}" }.join(", ")
+          formatted += "\n"
+        end
+        if example[:outputs]
+          formatted += "Outputs: "
+          formatted += example[:outputs].map { |k, v| "#{k}=#{v}" }.join(", ")
+          formatted += "\n"
+        end
+        formatted
+      end
+      def extract_instruction(response)
+        # Clean up the response
+        instruction = response.strip
+        # Remove any meta-commentary
+        instruction = instruction.sub(/^(Here's |This is )?the improved instruction:?\s*/i, '')
+        instruction = instruction.sub(/^Improved instruction:?\s*/i, '')
+        # Remove quotes if wrapped
+        instruction.gsub(/^["']|["']$/, '')
+      end
+      def create_program_with_instruction(program, predictor_name, instruction)
+        new_program = program.dup
+        # Get the predictor
+        predictor = new_program.predictors[predictor_name]
+        return new_program unless predictor
+        # Create new predictor with updated instruction
+        new_predictor = predictor.dup
+        new_predictor.instance_variable_set(:@instruction, instruction)
+        # Update the program
+        new_program.instance_variable_set("@#{predictor_name}", new_predictor)
+        new_program
+      end
+      def evaluate_program(program, dataset, metric)
+        scores = []
+        dataset.each do |example|
+          # Run program
+          prediction = program.forward(**example[:inputs])
+          # Calculate score
+          score = metric.call(prediction, example[:outputs])
+          scores << score
+        rescue StandardError => e
+          Desiru.logger.debug("[COPRO] Evaluation error: #{e.message}")
+          scores << 0.0
+        end
+        # Return average score
+        scores.empty? ? 0.0 : scores.sum.to_f / scores.length
+      end
+    end
+  end
+end