RubyGems - dspy - Versions diffs - 0.2.0 → 0.3.0 - Mend

dspy 0.2.0 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (30) hide show

checksums.yaml +4 -4
data/README.md +357 -248
data/lib/dspy/chain_of_thought.rb +151 -11
data/lib/dspy/instrumentation/token_tracker.rb +54 -0
data/lib/dspy/instrumentation.rb +100 -0
data/lib/dspy/lm/adapter.rb +41 -0
data/lib/dspy/lm/adapter_factory.rb +59 -0
data/lib/dspy/lm/adapters/anthropic_adapter.rb +96 -0
data/lib/dspy/lm/adapters/openai_adapter.rb +53 -0
data/lib/dspy/lm/adapters/ruby_llm_adapter.rb +81 -0
data/lib/dspy/lm/errors.rb +10 -0
data/lib/dspy/lm/response.rb +28 -0
data/lib/dspy/lm.rb +92 -40
data/lib/dspy/module.rb +51 -6
data/lib/dspy/predict.rb +135 -15
data/lib/dspy/re_act.rb +366 -191
data/lib/dspy/schema_adapters.rb +55 -0
data/lib/dspy/signature.rb +282 -10
data/lib/dspy/subscribers/logger_subscriber.rb +197 -0
data/lib/dspy/tools/{sorbet_tool.rb → base.rb} +33 -33
data/lib/dspy/tools.rb +1 -1
data/lib/dspy.rb +19 -10
metadata +60 -28
data/lib/dspy/ext/dry_schema.rb +0 -94
data/lib/dspy/sorbet_chain_of_thought.rb +0 -91
data/lib/dspy/sorbet_module.rb +0 -47
data/lib/dspy/sorbet_predict.rb +0 -180
data/lib/dspy/sorbet_re_act.rb +0 -332
data/lib/dspy/sorbet_signature.rb +0 -218
data/lib/dspy/types.rb +0 -3

data/lib/dspy/re_act.rb CHANGED Viewed

@@ -1,253 +1,428 @@
+# typed: strict
+# frozen_string_literal: true
+require 'sorbet-runtime'
+require_relative 'predict'
+require_relative 'signature'
+require_relative 'chain_of_thought'
+require 'json'
+require_relative 'instrumentation'
 module DSPy
-  # Define the signature for ReAct reasoning
+  # Define a simple struct for history entries with proper type annotations
+  class HistoryEntry < T::Struct
+    const :step, Integer
+    prop :thought, T.nilable(String)
+    prop :action, T.nilable(String)
+    prop :action_input, T.nilable(T.any(String, Numeric, T::Hash[T.untyped, T.untyped], T::Array[T.untyped]))
+    prop :observation, T.nilable(String)
+    # Custom serialization to ensure compatibility with the rest of the code
+    def to_h
+      {
+        step: step,
+        thought: thought,
+        action: action,
+        action_input: action_input,
+        observation: observation
+      }.compact
+    end
+  end
+  # Defines the signature for ReAct reasoning using Sorbet signatures
   class Thought < DSPy::Signature
     description "Generate a thought about what to do next to answer the question."
     input do
-      required(:question).value(:string).meta(description: 'The question to answer')
-      required(:history).value(:array).meta(description: 'Previous thoughts and actions, including observations from tools. The agent MUST use information from the history to inform its actions and final answer. Each entry is a hash representing a step in the reasoning process.')
-      required(:available_tools).value(:string).meta(description: 'List of available tools and their descriptions. The agent MUST choose an action from this list or use "finish".')
+      const :question, String,
+        description: "The question to answer"
+      const :history, T::Array[HistoryEntry],
+        description: "Previous thoughts and actions, including observations from tools. The agent MUST use information from the history to inform its actions and final answer. Each entry is a hash representing a step in the reasoning process."
+      const :available_tools, T::Array[T::Hash[String, T.untyped]],
+        description: "Array of available tools with their JSON schemas. The agent MUST choose an action from the tool names in this list or use \"finish\". For each tool, use the name exactly as specified and provide action_input as a JSON object matching the tool's schema."
     end
     output do
-      required(:thought).value(:string).meta(description: 'Reasoning about what to do next, considering the history and observations.')
-      required(:action).value(:string).meta(description: 'The action to take. MUST be one of the tool names listed in `available_tools` input, or the literal string "finish" to provide the final answer.')
-      required(:action_input).value(:string).meta(description: 'Input for the chosen action. If action is "finish", this field MUST contain the final answer to the original question. This answer MUST be directly taken from the relevant Observation in the history if available. For example, if an observation showed "Observation: 100.0", and you are finishing, this field MUST be "100.0". Do not leave empty if finishing with an observed answer.')
+      const :thought, String,
+        description: "Reasoning about what to do next, considering the history and observations."
+      const :action, String,
+        description: "The action to take. MUST be one of the tool names listed in `available_tools` input, or the literal string \"finish\" to provide the final answer."
+      const :action_input, T.any(String, T::Hash[T.untyped, T.untyped]),
+        description: "Input for the chosen action. If action is a tool name, this MUST be a JSON object matching the tool's schema. If action is \"finish\", this field MUST contain the final answer to the original question. This answer MUST be directly taken from the relevant Observation in the history if available. For example, if an observation showed \"Observation: 100.0\", and you are finishing, this field MUST be \"100.0\". Do not leave empty if finishing with an observed answer."
     end
   end
-  # Define the signature for observing tool results
+  class NextStep < T::Enum
+    enums do
+      Continue = new("continue")
+      Finish = new("finish")
+    end
+  end
+  # Defines the signature for processing observations and deciding next steps
   class ReActObservation < DSPy::Signature
     description "Process the observation from a tool and decide what to do next."
     input do
-      required(:question).value(:string).meta(description: 'The original question')
-      required(:history).value(:array).meta(description: 'Previous thoughts, actions, and observations. Each entry is a hash representing a step in the reasoning process.')
-      required(:observation).value(:string).meta(description: 'The result from the last action')
+      const :question, String,
+        description: "The original question"
+      const :history, T::Array[HistoryEntry],
+        description: "Previous thoughts, actions, and observations. Each entry is a hash representing a step in the reasoning process."
+      const :observation, String,
+        description: "The result from the last action"
     end
     output do
-      required(:interpretation).value(:string).meta(description: 'Interpretation of the observation')
-      required(:next_step).value(:string).meta(description: 'What to do next: "continue" or "finish"')
+      const :interpretation, String,
+        description: "Interpretation of the observation"
+      const :next_step, NextStep,
+        description: "What to do next: '#{NextStep::Continue}' or '#{NextStep::Finish}'"
     end
   end
-  # ReAct Agent Module
-  class ReAct < DSPy::Module
-    attr_reader :signature_class, :internal_output_schema, :tools, :max_iterations
+  # ReAct Agent using Sorbet signatures
+  class ReAct < Predict
+    extend T::Sig
-    # Defines the structure for each entry in the ReAct history
-    HistoryEntry = Struct.new(:step, :thought, :action, :action_input, :observation, keyword_init: true) do
-      def to_h
-        {
-          step: step,
-          thought: thought,
-          action: action,
-          action_input: action_input,
-          observation: observation
-        }
-      end
-    end
+    FINISH_ACTION = "finish"
+    sig { returns(T.class_of(DSPy::Signature)) }
+    attr_reader :original_signature_class
+    sig { returns(T.class_of(T::Struct)) }
+    attr_reader :enhanced_output_struct
+    sig { returns(T::Hash[String, T.untyped]) }
+    attr_reader :tools
+    sig { returns(Integer) }
+    attr_reader :max_iterations
+    sig { params(signature_class: T.class_of(DSPy::Signature), tools: T::Array[T.untyped], max_iterations: Integer).void }
     def initialize(signature_class, tools: [], max_iterations: 5)
-      super()
-      @signature_class = signature_class # User's original signature class
-      @thought_generator = DSPy::ChainOfThought.new(Thought)
-      @observation_processor = DSPy::Predict.new(ReActObservation)
-      @tools = tools.map { |tool| [tool.name.downcase, tool] }.to_h # Ensure tool names are stored lowercased for lookup
+      @original_signature_class = signature_class
+      @tools = T.let({}, T::Hash[String, T.untyped])
+      tools.each { |tool| @tools[tool.name.downcase] = tool }
       @max_iterations = max_iterations
-      # Define the schema for fields automatically added by ReAct
-      react_added_output_schema = Dry::Schema.JSON do
-        optional(:history).array(:hash) do
-          required(:step).value(:integer)
-          optional(:thought).value(:string)
-          optional(:action).value(:string)
-          optional(:action_input).maybe(:string)
-          optional(:observation).maybe(:string)
+      # Create thought generator using Predict to preserve field descriptions
+      @thought_generator = T.let(DSPy::Predict.new(Thought), DSPy::Predict)
+      # Create observation processor using Predict to preserve field descriptions
+      @observation_processor = T.let(DSPy::Predict.new(ReActObservation), DSPy::Predict)
+      # Create enhanced output struct with ReAct fields
+      @enhanced_output_struct = create_enhanced_output_struct(signature_class)
+      enhanced_output_struct = @enhanced_output_struct
+      # Create enhanced signature class
+      enhanced_signature = Class.new(DSPy::Signature) do
+        # Set the description
+        description signature_class.description
+        # Use the same input struct
+        @input_struct_class = signature_class.input_struct_class
+        # Use the enhanced output struct with ReAct fields
+        @output_struct_class = enhanced_output_struct
+        class << self
+          attr_reader :input_struct_class, :output_struct_class
         end
-        optional(:iterations).value(:integer).meta(description: 'Number of iterations taken by the ReAct agent.')
       end
-      # Create the augmented internal output schema by combining user's output schema and ReAct's added fields
-      @internal_output_schema = Dry::Schema.JSON(parent: [signature_class.output_schema, react_added_output_schema])
+      # Call parent constructor with enhanced signature
+      super(enhanced_signature)
     end
-    def forward(**input_values)
-      # Validate input against the signature's input schema
-      input_validation_result = @signature_class.input_schema.call(input_values)
-      unless input_validation_result.success?
-        raise DSPy::PredictionInvalidError.new(input_validation_result.errors)
-      end
+    sig { params(kwargs: T.untyped).returns(T.untyped).override }
+    def forward(**kwargs)
+      lm = config.lm || DSPy.config.lm
+      # Prepare instrumentation payload
+      input_fields = kwargs.keys.map(&:to_s)
+      available_tools = @tools.keys
+      # Instrument the entire ReAct agent lifecycle
+      result = Instrumentation.instrument('dspy.react', {
+        signature_class: @original_signature_class.name,
+        model: lm.model,
+        provider: lm.provider,
+        input_fields: input_fields,
+        max_iterations: @max_iterations,
+        available_tools: available_tools
+      }) do
+        # Validate input using Sorbet struct validation
+        input_struct = @original_signature_class.input_struct_class.new(**kwargs)
+        # Get the question (assume first field is the question for now)
+        question = T.cast(input_struct.serialize.values.first, String)
+        history = T.let([], T::Array[HistoryEntry])
+        available_tools_desc = @tools.map { |name, tool| JSON.parse(tool.schema) }
+        final_answer = T.let(nil, T.nilable(String))
+        iterations_count = 0
+        last_observation = T.let(nil, T.nilable(String))
+        tools_used = []
+        while @max_iterations.nil? || iterations_count < @max_iterations
+          iterations_count += 1
+          # Instrument each iteration
+          iteration_result = Instrumentation.instrument('dspy.react.iteration', {
+            iteration: iterations_count,
+            max_iterations: @max_iterations,
+            history_length: history.length,
+            tools_used_so_far: tools_used.uniq
+          }) do
+            # Get next thought from LM
+            thought_obj = @thought_generator.forward(
+              question: question,
+              history: history,
+              available_tools: available_tools_desc
+            )
+            step = iterations_count
+            thought = thought_obj.thought
+            action = thought_obj.action
+            action_input = thought_obj.action_input
+            # Break if finish action
+            if action&.downcase == 'finish'
+              final_answer = handle_finish_action(action_input, last_observation, step, thought, action, history)
+              break
+            end
-      # Assume the first input field is the primary question for the ReAct loop
-      # This is a convention; a more robust solution might involve explicit mapping
-      # or requiring a specific field name like 'question'.
-      question_field_name = @signature_class.input_schema.key_map.first.name.to_sym
-      question = input_values[question_field_name]
+            # Track tools used
+            tools_used << action.downcase if action && @tools[action.downcase]
+            # Execute action
+            observation = if action && @tools[action.downcase]
+                            # Instrument tool call
+                            Instrumentation.instrument('dspy.react.tool_call', {
+                              iteration: iterations_count,
+                              tool_name: action.downcase,
+                              tool_input: action_input
+                            }) do
+                              execute_action(action, action_input)
+                            end
+                          else
+                            "Unknown action: #{action}. Available actions: #{@tools.keys.join(', ')}, finish"
+                          end
+            last_observation = observation
+            # Add to history
+            history << HistoryEntry.new(
+              step: step,
+              thought: thought,
+              action: action,
+              action_input: action_input,
+              observation: observation
+            )
+            # Process observation to decide next step
+            if observation && !observation.include?("Unknown action")
+              observation_result = @observation_processor.forward(
+                question: question,
+                history: history,
+                observation: observation
+              )
-      history = [] # Initialize history as an array of HistoryEntry objects
-      available_tools_desc = @tools.map { |name, tool| "- #{name}: #{tool.description}" }.join("\n")
+              # If observation processor suggests finishing, generate final thought
+              if observation_result.next_step == NextStep::Finish
+                final_thought = @thought_generator.forward(
+                  question: question,
+                  history: history,
+                  available_tools: available_tools_desc
+                )
-      final_answer = nil
-      iterations_count = 0
+                # Force finish action if observation processor suggests it
+                if final_thought.action&.downcase != 'finish'
+                  forced_answer = if observation_result.interpretation && !observation_result.interpretation.empty?
+                                    observation_result.interpretation
+                                  else
+                                    observation
+                                  end
+                  final_answer = handle_finish_action(forced_answer, last_observation, step + 1, final_thought.thought, 'finish', history)
+                else
+                  final_answer = handle_finish_action(final_thought.action_input, last_observation, step + 1, final_thought.thought, final_thought.action, history)
+                end
+                break
+              end
+            end
-      @max_iterations.times do |i|
-        iterations_count = i + 1
-        current_step_history = { step: iterations_count }
+            # Emit iteration complete event
+            Instrumentation.emit('dspy.react.iteration_complete', {
+              iteration: iterations_count,
+              thought: thought,
+              action: action,
+              action_input: action_input,
+              observation: observation,
+              tools_used: tools_used.uniq
+            })
+          end
-        # Generate thought and action
-        thought_result = @thought_generator.call(
-          question: question,
-          history: history.map(&:to_h),
-          available_tools: available_tools_desc
-        )
-        thought = thought_result.thought
-        action = thought_result.action
-        current_action_input = thought_result.action_input # What LM provided
-        current_step_history[:thought] = thought
-        current_step_history[:action] = action
-        if action.downcase == "finish"
-          # If LM says 'finish' but gives empty input, try to use last observation
-          if current_action_input.nil? || current_action_input.strip.empty?
-            # Try to find the last observation in history
-            last_entry_with_observation = history.reverse.find { |entry| entry.observation && !entry.observation.strip.empty? }
-            if last_entry_with_observation
-              last_observation_value = last_entry_with_observation.observation.strip
-              DSPy.logger.info(
-                module: "ReAct",
-                status: "Finish action had empty input. Overriding with last observation.",
-                original_input: current_action_input,
-                derived_input: last_observation_value
-              )
-              current_action_input = last_observation_value # Override
-            else
-              DSPy.logger.warn(module: "ReAct", status: "Finish action had empty input, no prior Observation found in history.", original_input: current_action_input)
-            end
+          # Check if max iterations reached
+          if iterations_count >= @max_iterations && final_answer.nil?
+            Instrumentation.emit('dspy.react.max_iterations', {
+              iteration_count: iterations_count,
+              max_iterations: @max_iterations,
+              tools_used: tools_used.uniq,
+              final_history_length: history.length
+            })
           end
-          final_answer = current_action_input # Set final answer from (potentially overridden) input
         end
-        # Add thought to history using current_action_input, which might have been overridden for 'finish'
-        current_step_history[:action_input] = current_action_input
+        # Create enhanced output with all ReAct data
+        output_field_name = @original_signature_class.output_struct_class.props.keys.first
+        output_data = kwargs.merge({
+          history: history.map(&:to_h),
+          iterations: iterations_count,
+          tools_used: tools_used.uniq
+        })
+        output_data[output_field_name] = final_answer || "No answer reached within #{@max_iterations} iterations"
+        enhanced_output = @enhanced_output_struct.new(**output_data)
-        # Check if we should finish (using the original action from LM)
-        if action.downcase == "finish"
-          DSPy.logger.info(module: "ReAct", status: "Finishing loop after thought", action: action, final_answer: final_answer, question: question)
-          history << HistoryEntry.new(**current_step_history) # Add final thought/action before breaking
-          break
-        end
+        enhanced_output
+      end
+      result
+    end
-        # Execute the action
-        observation_text = execute_action(action, current_action_input) # current_action_input is original for non-finish
-        current_step_history[:observation] = observation_text
-        history << HistoryEntry.new(**current_step_history) # Add completed step to history
+    private
-        # Process the observation
-        obs_result = @observation_processor.call(
-          question: question,
-          history: history.map(&:to_h),
-          observation: observation_text
-        )
-        if obs_result.next_step.downcase == "finish"
-          DSPy.logger.info(module: "ReAct", status: "Observation processor suggests finish. Generating final thought.", question: question, history_before_final_thought: history.map(&:to_h))
-          # Generate final thought/answer if observation processor decides to finish
-          # Create a new history entry for this final thought sequence
-          final_thought_step_history = { step: iterations_count + 1 } # This is like a sub-step or a new thought step
-          final_thought_result = @thought_generator.call(
-            question: question,
-            history: history.map(&:to_h), # history now includes the last observation
-            available_tools: available_tools_desc
-          )
-          DSPy.logger.info(module: "ReAct", status: "Finishing after observation and final thought", final_action: final_thought_result.action, final_action_input: final_thought_result.action_input, question: question)
-          final_thought_action = final_thought_result.action
-          final_thought_action_input_val = final_thought_result.action_input # LM provided
-          final_thought_step_history[:thought] = final_thought_result.thought
-          final_thought_step_history[:action] = final_thought_action
-          if final_thought_action.downcase == "finish"
-            if final_thought_action_input_val.nil? || final_thought_action_input_val.strip.empty?
-              # Find the last observation in the history array
-              last_entry_with_observation = history.reverse.find { |entry| entry.observation && !entry.observation.strip.empty? }
-              if last_entry_with_observation
-                last_observation_value_ft = last_entry_with_observation.observation.strip
-                DSPy.logger.info(
-                  module: "ReAct",
-                  status: "Final thought 'finish' action had empty input. Overriding with last observation.",
-                  original_input: final_thought_action_input_val,
-                  derived_input: last_observation_value_ft
-                )
-                final_thought_action_input_val = last_observation_value_ft # Override
-              else
-                DSPy.logger.warn(module: "ReAct", status: "Final thought 'finish' action had empty input, last observation also empty/not found cleanly.", original_input: final_thought_action_input_val)
-              end
-            else
-              # This case is if LM provides 'finish' but no observation to fall back on in history array (should be rare if history is populated correctly)
-              DSPy.logger.warn(module: "ReAct", status: "Final thought 'finish' action had empty input, no prior Observation found in history array.", original_input: final_thought_action_input_val) if (history.empty? || !history.any? { |entry| entry.observation && !entry.observation.strip.empty? })
-            end
+    sig { params(signature_class: T.class_of(DSPy::Signature)).returns(T.class_of(T::Struct)) }
+    def create_enhanced_output_struct(signature_class)
+      # Get original input and output props
+      input_props = signature_class.input_struct_class.props
+      output_props = signature_class.output_struct_class.props
+      # Create new struct class with input, output, and ReAct fields
+      Class.new(T::Struct) do
+        # Add all input fields
+        input_props.each do |name, prop|
+          # Extract the type and other options
+          type = prop[:type]
+          options = prop.except(:type, :type_object, :accessor_key, :sensitivity, :redaction)
+          # Handle default values
+          if options[:default]
+            const name, type, default: options[:default]
+          elsif options[:factory]
+            const name, type, factory: options[:factory]
+          else
+            const name, type
           end
+        end
-          final_thought_step_history[:action_input] = final_thought_action_input_val
-          history << HistoryEntry.new(**final_thought_step_history) # Add this final step to history
-          final_answer = final_thought_action_input_val # Use (potentially overridden) value
-          iterations_count += 1 # Account for this extra thought step in iterations
-          break
+        # Add all output fields
+        output_props.each do |name, prop|
+          # Extract the type and other options
+          type = prop[:type]
+          options = prop.except(:type, :type_object, :accessor_key, :sensitivity, :redaction)
+          # Handle default values
+          if options[:default]
+            const name, type, default: options[:default]
+          elsif options[:factory]
+            const name, type, factory: options[:factory]
+          else
+            const name, type
+          end
         end
+        # Add ReAct-specific fields
+        prop :history, T::Array[T::Hash[Symbol, T.untyped]]
+        prop :iterations, Integer
+        prop :tools_used, T::Array[String]
       end
+    end
-      final_answer ||= "Unable to find answer within #{@max_iterations} iterations"
-      DSPy.logger.info(module: "ReAct", status: "Final answer determined", final_answer: final_answer, question: question) if final_answer.nil? || final_answer.empty? || final_answer == "Unable to find answer within #{@max_iterations} iterations"
+    sig { params(action: String, action_input: T.untyped).returns(String) }
+    def execute_action(action, action_input)
+      tool_name = action.downcase
+      tool = @tools[tool_name]
+      return "Tool '#{action}' not found. Available tools: #{@tools.keys.join(', ')}" unless tool
-      # Prepare output data
-      output_data = {}
+      begin
+        result = if action_input.nil? ||
+                   (action_input.is_a?(String) && action_input.strip.empty?)
+          # No input provided
+          tool.dynamic_call({})
+        else
+          # Pass the action_input directly to dynamic_call, which can handle
+          # either a Hash or a JSON string
+          tool.dynamic_call(action_input)
+        end
+        result.to_s
+      rescue => e
+        "Error executing tool '#{action}': #{e.message}"
+      end
+    end
+    sig { params(output: T.untyped).void }
+    def validate_output_schema!(output)
+      # Validate that output is an instance of the enhanced output struct
+      unless output.is_a?(@enhanced_output_struct)
+        raise "Output must be an instance of #{@enhanced_output_struct}, got #{output.class}"
+      end
-      # Populate the primary answer field from the user's original signature
-      # This assumes the first defined output field in the user's signature is the main answer field.
-      user_primary_output_field = @signature_class.output_schema.key_map.first.name.to_sym
-      output_data[user_primary_output_field] = final_answer
+      # Validate original signature output fields are present
+      @original_signature_class.output_struct_class.props.each do |field_name, _prop|
+        unless output.respond_to?(field_name)
+          raise "Missing required field: #{field_name}"
+        end
+      end
-      # Add ReAct-specific fields
-      output_data[:history] = history.map(&:to_h) # Convert HistoryEntry objects to hashes for schema validation
-      output_data[:iterations] = iterations_count
+      # Validate ReAct-specific fields
+      unless output.respond_to?(:history) && output.history.is_a?(Array)
+        raise "Missing or invalid history field"
+      end
-      # Validate and create PORO using the augmented internal_output_schema
-      output_validation_result = @internal_output_schema.call(output_data)
-      unless output_validation_result.success?
-        DSPy.logger.error(module: "ReAct", status: "Internal output validation failed", errors: output_validation_result.errors.to_h, data: output_data)
-        raise DSPy::PredictionInvalidError.new(output_validation_result.errors)
+      unless output.respond_to?(:iterations) && output.iterations.is_a?(Integer)
+        raise "Missing or invalid iterations field"
       end
-      # Create PORO with all fields (user's + ReAct's)
-      # Sorting keys for Data.define ensures a consistent order for the PORO attributes.
-      poro_class = Data.define(*output_validation_result.to_h.keys.sort)
-      poro_class.new(**output_validation_result.to_h)
+      unless output.respond_to?(:tools_used) && output.tools_used.is_a?(Array)
+        raise "Missing or invalid tools_used field"
+      end
     end
-    private
+    sig { override.returns(T::Hash[Symbol, T.untyped]) }
+    def generate_example_output
+      example = super
+      example[:history] = [
+        {
+          step: 1,
+          thought: "I need to think about this question...",
+          action: "some_tool",
+          action_input: "input for tool",
+          observation: "result from tool"
+        }
+      ]
+      example[:iterations] = 1
+      example[:tools_used] = ["some_tool"]
+      example
+    end
-    def execute_action(action, action_input)
-      tool = @tools[action.downcase] # Lookup with downcased action name
+    sig { params(action_input: T.untyped, last_observation: T.nilable(String), step: Integer, thought: String, action: String, history: T::Array[HistoryEntry]).returns(String) }
+    def handle_finish_action(action_input, last_observation, step, thought, action, history)
+      final_answer = action_input.to_s
-      if tool.nil?
-        return "Error: Unknown tool '#{action}'. Available tools: #{@tools.keys.join(', ')}"
+      # If final_answer is empty but we have a last observation, use it
+      if (final_answer.nil? || final_answer.empty?) && last_observation
+        final_answer = last_observation
       end
-      begin
-        tool.call(action_input)
-      rescue => e
-        "Error executing #{action}: #{e.message}"
-      end
+      # Always add the finish action to history
+      history << HistoryEntry.new(
+        step: step,
+        thought: thought,
+        action: action,
+        action_input: final_answer,
+        observation: nil  # No observation for finish action
+      )
+      final_answer
     end
   end
 end