RubyGems - dspy - Versions diffs - 0.27.6 → 0.28.0 - Mend

dspy 0.27.6 → 0.28.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (22) hide show

checksums.yaml +4 -4
data/README.md +28 -9
data/lib/dspy/lm/adapter_factory.rb +1 -1
data/lib/dspy/lm/adapters/anthropic_adapter.rb +3 -2
data/lib/dspy/lm/chat_strategy.rb +38 -0
data/lib/dspy/lm/json_strategy.rb +222 -0
data/lib/dspy/lm.rb +13 -16
data/lib/dspy/re_act.rb +253 -68
data/lib/dspy/tools/base.rb +5 -7
data/lib/dspy/version.rb +1 -1
data/lib/dspy.rb +0 -8
metadata +4 -12
data/lib/dspy/lm/retry_handler.rb +0 -132
data/lib/dspy/lm/strategies/anthropic_extraction_strategy.rb +0 -78
data/lib/dspy/lm/strategies/anthropic_tool_use_strategy.rb +0 -132
data/lib/dspy/lm/strategies/base_strategy.rb +0 -53
data/lib/dspy/lm/strategies/enhanced_prompting_strategy.rb +0 -178
data/lib/dspy/lm/strategies/gemini_structured_output_strategy.rb +0 -80
data/lib/dspy/lm/strategies/openai_structured_output_strategy.rb +0 -65
data/lib/dspy/lm/strategy_selector.rb +0 -144
data/lib/dspy/lm/structured_output_strategy.rb +0 -17
data/lib/dspy/strategy.rb +0 -18

data/lib/dspy/lm/retry_handler.rb DELETED Viewed

@@ -1,132 +0,0 @@
-# frozen_string_literal: true
-require "sorbet-runtime"
-require "async"
-module DSPy
-  class LM
-    # Handles retry logic with progressive fallback strategies
-    class RetryHandler
-      extend T::Sig
-      MAX_RETRIES = 3
-      BACKOFF_BASE = 0.5 # seconds
-      sig { params(adapter: DSPy::LM::Adapter, signature_class: T.class_of(DSPy::Signature)).void }
-      def initialize(adapter, signature_class)
-        @adapter = adapter
-        @signature_class = signature_class
-        @attempt = 0
-      end
-      # Execute a block with retry logic and progressive fallback
-      sig do
-        type_parameters(:T)
-          .params(
-            initial_strategy: Strategies::BaseStrategy,
-            block: T.proc.params(strategy: Strategies::BaseStrategy).returns(T.type_parameter(:T))
-          )
-          .returns(T.type_parameter(:T))
-      end
-      def with_retry(initial_strategy, &block)
-        # Skip retries entirely if disabled
-        unless DSPy.config.structured_outputs.retry_enabled
-          return yield(initial_strategy)
-        end
-        strategies = build_fallback_chain(initial_strategy)
-        last_error = nil
-        strategies.each do |strategy|
-          retry_count = 0
-          begin
-            @attempt += 1
-            DSPy.logger.debug("Attempting with strategy: #{strategy.name} (attempt #{@attempt})")
-            result = yield(strategy)
-            # Success! Reset attempt counter for next time
-            @attempt = 0
-            return result
-          rescue JSON::ParserError, StandardError => e
-            last_error = e
-            # Let strategy handle the error first
-            if strategy.handle_error(e)
-              DSPy.logger.debug("Strategy #{strategy.name} handled error, trying next strategy")
-              next # Try next strategy
-            end
-            # Try retrying with the same strategy
-            if retry_count < max_retries_for_strategy(strategy)
-              retry_count += 1
-              backoff_time = calculate_backoff(retry_count)
-              # Use debug for structured output strategies since they often have expected failures
-              log_level = ["openai_structured_output", "gemini_structured_output"].include?(strategy.name) ? :debug : :warn
-              if log_level == :debug
-                DSPy.logger.debug(
-                  "Retrying #{strategy.name} after error (attempt #{retry_count}/#{max_retries_for_strategy(strategy)}): #{e.message}"
-                )
-              else
-                DSPy.logger.warn(
-                  "Retrying #{strategy.name} after error (attempt #{retry_count}/#{max_retries_for_strategy(strategy)}): #{e.message}"
-                )
-              end
-              Async::Task.current.sleep(backoff_time) if backoff_time > 0
-              retry
-            else
-              DSPy.logger.info("Max retries reached for #{strategy.name}, trying next strategy")
-              next # Try next strategy
-            end
-          end
-        end
-        # All strategies exhausted
-        DSPy.logger.error("All strategies exhausted after #{@attempt} total attempts")
-        raise last_error || StandardError.new("All JSON extraction strategies failed")
-      end
-      private
-      # Build a chain of strategies to try in order
-      sig { params(initial_strategy: Strategies::BaseStrategy).returns(T::Array[Strategies::BaseStrategy]) }
-      def build_fallback_chain(initial_strategy)
-        selector = StrategySelector.new(@adapter, @signature_class)
-        all_strategies = selector.available_strategies.sort_by(&:priority).reverse
-        # Start with the requested strategy, then try others
-        chain = [initial_strategy]
-        chain.concat(all_strategies.reject { |s| s.name == initial_strategy.name })
-        chain
-      end
-      # Different strategies get different retry counts
-      sig { params(strategy: Strategies::BaseStrategy).returns(Integer) }
-      def max_retries_for_strategy(strategy)
-        case strategy.name
-        when "openai_structured_output", "gemini_structured_output"
-          1 # Structured outputs rarely benefit from retries, most errors are permanent
-        when "anthropic_extraction"
-          2 # Anthropic can be a bit more variable
-        else
-          MAX_RETRIES # Enhanced prompting might need more attempts
-        end
-      end
-      # Calculate exponential backoff with jitter
-      sig { params(attempt: Integer).returns(Float) }
-      def calculate_backoff(attempt)
-        base_delay = BACKOFF_BASE * (2 ** (attempt - 1))
-        jitter = rand * 0.1 * base_delay
-        [base_delay + jitter, 10.0].min # Cap at 10 seconds
-      end
-    end
-  end
-end

data/lib/dspy/lm/strategies/anthropic_extraction_strategy.rb DELETED Viewed

@@ -1,78 +0,0 @@
-# frozen_string_literal: true
-require_relative "base_strategy"
-module DSPy
-  class LM
-    module Strategies
-      # Strategy for using Anthropic's enhanced JSON extraction patterns
-      class AnthropicExtractionStrategy < BaseStrategy
-        extend T::Sig
-        sig { override.returns(T::Boolean) }
-        def available?
-          adapter.is_a?(DSPy::LM::AnthropicAdapter)
-        end
-        sig { override.returns(Integer) }
-        def priority
-          90 # High priority - Anthropic's extraction is very reliable
-        end
-        sig { override.returns(String) }
-        def name
-          "anthropic_extraction"
-        end
-        sig { override.params(messages: T::Array[T::Hash[Symbol, String]], request_params: T::Hash[Symbol, T.untyped]).void }
-        def prepare_request(messages, request_params)
-          # Anthropic adapter already handles JSON optimization in prepare_messages_for_json
-          # No additional preparation needed here
-        end
-        sig { override.params(response: DSPy::LM::Response).returns(T.nilable(String)) }
-        def extract_json(response)
-          # Use Anthropic's specialized extraction method if available
-          if adapter.respond_to?(:extract_json_from_response)
-            adapter.extract_json_from_response(response.content)
-          else
-            # Fallback to basic extraction
-            extract_json_fallback(response.content)
-          end
-        end
-        private
-        sig { params(content: T.nilable(String)).returns(T.nilable(String)) }
-        def extract_json_fallback(content)
-          return nil if content.nil?
-          # Try the 4 patterns Anthropic adapter uses
-          # Pattern 1: ```json blocks
-          if content.include?('```json')
-            return content.split('```json').last.split('```').first.strip
-          end
-          # Pattern 2: ## Output values header
-          if content.include?('## Output values')
-            json_part = content.split('## Output values').last
-            if json_part.include?('```')
-              return json_part.split('```')[1].strip
-            end
-          end
-          # Pattern 3: Generic code blocks
-          if content.include?('```')
-            code_block = content.split('```')[1]
-            if code_block && (code_block.strip.start_with?('{') || code_block.strip.start_with?('['))
-              return code_block.strip
-            end
-          end
-          # Pattern 4: Already valid JSON
-          content.strip if content.strip.start_with?('{') || content.strip.start_with?('[')
-        end
-      end
-    end
-  end
-end

data/lib/dspy/lm/strategies/anthropic_tool_use_strategy.rb DELETED Viewed

@@ -1,132 +0,0 @@
-# frozen_string_literal: true
-require "sorbet-runtime"
-module DSPy
-  class LM
-    module Strategies
-      # Strategy for using Anthropic's tool use feature for guaranteed JSON output
-      class AnthropicToolUseStrategy < BaseStrategy
-        extend T::Sig
-        sig { override.returns(T::Boolean) }
-        def available?
-          # Only available for Anthropic adapters with models that support tool use
-          adapter.is_a?(DSPy::LM::AnthropicAdapter) && supports_tool_use?
-        end
-        sig { override.returns(Integer) }
-        def priority
-          95 # Higher priority than extraction strategy - tool use is more reliable
-        end
-        sig { override.returns(String) }
-        def name
-          "anthropic_tool_use"
-        end
-        sig { override.params(messages: T::Array[T::Hash[Symbol, String]], request_params: T::Hash[Symbol, T.untyped]).void }
-        def prepare_request(messages, request_params)
-          # Convert signature output schema to Anthropic tool format
-          tool_schema = convert_to_tool_schema
-          # Add the tool definition to request params
-          request_params[:tools] = [tool_schema]
-          # Force the model to use our tool
-          request_params[:tool_choice] = {
-            type: "tool",
-            name: "json_output"
-          }
-          # Update the last user message to request tool use
-          if messages.any? && messages.last[:role] == "user"
-            messages.last[:content] += "\n\nPlease use the json_output tool to provide your response."
-          end
-        end
-        sig { override.params(response: DSPy::LM::Response).returns(T.nilable(String)) }
-        def extract_json(response)
-          # Extract JSON from tool use response
-          begin
-            # Check for tool calls in metadata first (this is the primary method)
-            if response.metadata.respond_to?(:tool_calls) && response.metadata.tool_calls
-              tool_calls = response.metadata.tool_calls
-              if tool_calls.is_a?(Array) && !tool_calls.empty?
-                first_call = tool_calls.first
-                if first_call[:name] == "json_output" && first_call[:input]
-                  json_result = JSON.generate(first_call[:input])
-                  return json_result
-                end
-              end
-            end
-            # Fallback: try to extract from content if it contains tool use blocks
-            content = response.content
-            if content && !content.empty? && content.include?("<tool_use>")
-              tool_content = content[/<tool_use>.*?<\/tool_use>/m]
-              if tool_content
-                json_match = tool_content[/<input>(.*?)<\/input>/m, 1]
-                return json_match.strip if json_match
-              end
-            end
-            nil
-          rescue => e
-            DSPy.logger.debug("Failed to extract tool use JSON: #{e.message}")
-            nil
-          end
-        end
-        sig { override.params(error: StandardError).returns(T::Boolean) }
-        def handle_error(error)
-          # Tool use errors should trigger fallback to extraction strategy
-          if error.message.include?("tool") || error.message.include?("invalid_request_error")
-            DSPy.logger.warn("Anthropic tool use failed: #{error.message}")
-            true # We handled it, try next strategy
-          else
-            false # Let retry handler deal with it
-          end
-        end
-        private
-        sig { returns(T::Boolean) }
-        def supports_tool_use?
-          # Check if model supports tool use
-          # Claude 3 models (Opus, Sonnet, Haiku) support tool use
-          model = adapter.model.downcase
-          model.include?("claude-3") || model.include?("claude-3.5")
-        end
-        sig { returns(T::Hash[Symbol, T.untyped]) }
-        def convert_to_tool_schema
-          # Get output fields from signature
-          output_fields = signature_class.output_field_descriptors
-          # Convert to Anthropic tool format
-          {
-            name: "json_output",
-            description: "Output the result in the required JSON format",
-            input_schema: {
-              type: "object",
-              properties: build_properties_from_fields(output_fields),
-              required: output_fields.keys.map(&:to_s)
-            }
-          }
-        end
-        sig { params(fields: T::Hash[Symbol, T.untyped]).returns(T::Hash[String, T.untyped]) }
-        def build_properties_from_fields(fields)
-          properties = {}
-          fields.each do |field_name, descriptor|
-            properties[field_name.to_s] = DSPy::TypeSystem::SorbetJsonSchema.type_to_json_schema(descriptor.type)
-          end
-          properties
-        end
-      end
-    end
-  end
-end

data/lib/dspy/lm/strategies/base_strategy.rb DELETED Viewed

@@ -1,53 +0,0 @@
-# frozen_string_literal: true
-require "sorbet-runtime"
-module DSPy
-  class LM
-    module Strategies
-      # Base class for JSON extraction strategies
-      class BaseStrategy
-        extend T::Sig
-        extend T::Helpers
-        abstract!
-        sig { params(adapter: DSPy::LM::Adapter, signature_class: T.class_of(DSPy::Signature)).void }
-        def initialize(adapter, signature_class)
-          @adapter = adapter
-          @signature_class = signature_class
-        end
-        # Check if this strategy is available for the given adapter/model
-        sig { abstract.returns(T::Boolean) }
-        def available?; end
-        # Priority for this strategy (higher = preferred)
-        sig { abstract.returns(Integer) }
-        def priority; end
-        # Name of the strategy for logging/debugging
-        sig { abstract.returns(String) }
-        def name; end
-        # Prepare the request for JSON extraction
-        sig { abstract.params(messages: T::Array[T::Hash[Symbol, String]], request_params: T::Hash[Symbol, T.untyped]).void }
-        def prepare_request(messages, request_params); end
-        # Extract JSON from the response
-        sig { abstract.params(response: DSPy::LM::Response).returns(T.nilable(String)) }
-        def extract_json(response); end
-        # Handle errors specific to this strategy
-        sig { params(error: StandardError).returns(T::Boolean) }
-        def handle_error(error)
-          # By default, don't handle errors - let them propagate
-          false
-        end
-        protected
-        attr_reader :adapter, :signature_class
-      end
-    end
-  end
-end

data/lib/dspy/lm/strategies/enhanced_prompting_strategy.rb DELETED Viewed

@@ -1,178 +0,0 @@
-# frozen_string_literal: true
-require_relative "base_strategy"
-module DSPy
-  class LM
-    module Strategies
-      # Enhanced prompting strategy that works with any LLM
-      # Adds explicit JSON formatting instructions to improve reliability
-      class EnhancedPromptingStrategy < BaseStrategy
-        extend T::Sig
-        sig { override.returns(T::Boolean) }
-        def available?
-          # This strategy is always available as a fallback
-          true
-        end
-        sig { override.returns(Integer) }
-        def priority
-          50 # Medium priority - use when native methods aren't available
-        end
-        sig { override.returns(String) }
-        def name
-          "enhanced_prompting"
-        end
-        sig { override.params(messages: T::Array[T::Hash[Symbol, String]], request_params: T::Hash[Symbol, T.untyped]).void }
-        def prepare_request(messages, request_params)
-          # Enhance the user message with explicit JSON instructions
-          return if messages.empty?
-          # Get the output schema
-          output_schema = signature_class.output_json_schema
-          # Find the last user message
-          last_user_idx = messages.rindex { |msg| msg[:role] == "user" }
-          return unless last_user_idx
-          # Add JSON formatting instructions
-          original_content = messages[last_user_idx][:content]
-          enhanced_content = enhance_prompt_with_json_instructions(original_content, output_schema)
-          messages[last_user_idx][:content] = enhanced_content
-          # Add system instructions if no system message exists
-          if messages.none? { |msg| msg[:role] == "system" }
-            messages.unshift({
-              role: "system",
-              content: "You are a helpful assistant that always responds with valid JSON when requested."
-            })
-          end
-        end
-        sig { override.params(response: DSPy::LM::Response).returns(T.nilable(String)) }
-        def extract_json(response)
-          return nil if response.content.nil?
-          content = response.content.strip
-          # Try multiple extraction patterns
-          # 1. Check for markdown code blocks
-          if content.include?('```json')
-            json_content = content.split('```json').last.split('```').first.strip
-            return json_content if valid_json?(json_content)
-          elsif content.include?('```')
-            code_block = content.split('```')[1]
-            if code_block
-              json_content = code_block.strip
-              return json_content if valid_json?(json_content)
-            end
-          end
-          # 2. Check if the entire response is JSON
-          return content if valid_json?(content)
-          # 3. Look for JSON-like structures in the content
-          json_match = content.match(/\{[\s\S]*\}|\[[\s\S]*\]/)
-          if json_match
-            json_content = json_match[0]
-            return json_content if valid_json?(json_content)
-          end
-          nil
-        end
-        private
-        sig { params(prompt: String, schema: T::Hash[Symbol, T.untyped]).returns(String) }
-        def enhance_prompt_with_json_instructions(prompt, schema)
-          json_example = generate_example_from_schema(schema)
-          <<~ENHANCED
-            #{prompt}
-            IMPORTANT: You must respond with valid JSON that matches this structure:
-            ```json
-            #{JSON.pretty_generate(json_example)}
-            ```
-            Required fields: #{schema[:required]&.join(', ') || 'none'}
-            Ensure your response:
-            1. Is valid JSON (properly quoted strings, no trailing commas)
-            2. Includes all required fields
-            3. Uses the correct data types for each field
-            4. Is wrapped in ```json``` markdown code blocks
-          ENHANCED
-        end
-        sig { params(schema: T::Hash[Symbol, T.untyped]).returns(T::Hash[String, T.untyped]) }
-        def generate_example_from_schema(schema)
-          return {} unless schema[:properties]
-          example = {}
-          schema[:properties].each do |field_name, field_schema|
-            example[field_name.to_s] = generate_example_value(field_schema)
-          end
-          example
-        end
-        sig { params(field_schema: T::Hash[Symbol, T.untyped]).returns(T.untyped) }
-        def generate_example_value(field_schema)
-          case field_schema[:type]
-          when "string"
-            field_schema[:description] || "example string"
-          when "integer"
-            42
-          when "number"
-            3.14
-          when "boolean"
-            true
-          when "array"
-            if field_schema[:items]
-              [generate_example_value(field_schema[:items])]
-            else
-              ["example item"]
-            end
-          when "object"
-            if field_schema[:properties]
-              # Generate proper nested object example
-              nested_example = {}
-              field_schema[:properties].each do |prop_name, prop_schema|
-                nested_example[prop_name.to_s] = generate_example_value(prop_schema)
-              end
-              nested_example
-            else
-              { "nested" => "object" }
-            end
-          when Array
-            # Handle union types like ["object", "null"]
-            if field_schema[:type].include?("object") && field_schema[:properties]
-              nested_example = {}
-              field_schema[:properties].each do |prop_name, prop_schema|
-                nested_example[prop_name.to_s] = generate_example_value(prop_schema)
-              end
-              nested_example
-            elsif field_schema[:type].include?("string")
-              "example string"
-            else
-              "example value"
-            end
-          else
-            "example value"
-          end
-        end
-        sig { params(content: String).returns(T::Boolean) }
-        def valid_json?(content)
-          JSON.parse(content)
-          true
-        rescue JSON::ParserError
-          false
-        end
-      end
-    end
-  end
-end

data/lib/dspy/lm/strategies/gemini_structured_output_strategy.rb DELETED Viewed

@@ -1,80 +0,0 @@
-# frozen_string_literal: true
-require_relative "base_strategy"
-require_relative "../adapters/gemini/schema_converter"
-module DSPy
-  class LM
-    module Strategies
-      # Strategy for using Gemini's native structured output feature
-      class GeminiStructuredOutputStrategy < BaseStrategy
-        extend T::Sig
-        sig { override.returns(T::Boolean) }
-        def available?
-          # Check if adapter is Gemini and supports structured outputs
-          return false unless adapter.is_a?(DSPy::LM::GeminiAdapter)
-          return false unless adapter.instance_variable_get(:@structured_outputs_enabled)
-          DSPy::LM::Adapters::Gemini::SchemaConverter.supports_structured_outputs?(adapter.model)
-        end
-        sig { override.returns(Integer) }
-        def priority
-          100 # Highest priority - native structured outputs are most reliable
-        end
-        sig { override.returns(String) }
-        def name
-          "gemini_structured_output"
-        end
-        sig { override.params(messages: T::Array[T::Hash[Symbol, String]], request_params: T::Hash[Symbol, T.untyped]).void }
-        def prepare_request(messages, request_params)
-          # Convert signature to Gemini JSON Schema format (supports oneOf/anyOf for unions)
-          schema = DSPy::LM::Adapters::Gemini::SchemaConverter.to_gemini_format(signature_class)
-          # Add generation_config for structured output using JSON Schema format
-          request_params[:generation_config] = {
-            response_mime_type: "application/json",
-            response_json_schema: schema  # Use JSON Schema format for proper union support
-          }
-        end
-        sig { override.params(response: DSPy::LM::Response).returns(T.nilable(String)) }
-        def extract_json(response)
-          # With Gemini structured outputs, the response should already be valid JSON
-          # Just return the content as-is
-          response.content
-        end
-        sig { override.params(error: StandardError).returns(T::Boolean) }
-        def handle_error(error)
-          # Handle Gemini-specific structured output errors
-          error_msg = error.message.to_s.downcase
-          # Check for permanent errors that shouldn't be retried
-          permanent_error_patterns = [
-            "schema",
-            "generation_config",
-            "response_schema",
-            "unknown name \"response_mime_type\"",
-            "unknown name \"response_schema\"",
-            "invalid json payload",
-            "no matching sse interaction found",  # VCR test configuration issue
-            "cannot find field"
-          ]
-          if permanent_error_patterns.any? { |pattern| error_msg.include?(pattern) }
-            # These are permanent errors - no point retrying
-            DSPy.logger.debug("Gemini structured output failed (permanent error, skipping retries): #{error.message}")
-            true # Skip retries and try next strategy
-          else
-            # Unknown error - let retry logic handle it
-            false
-          end
-        end
-      end
-    end
-  end
-end