RubyGems - dspy - Versions diffs - 0.30.0 → 0.31.0 - Mend

dspy 0.30.0 → 0.31.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (26) hide show

checksums.yaml +4 -4
data/README.md +68 -37
data/lib/dspy/callbacks.rb +21 -2
data/lib/dspy/context.rb +52 -1
data/lib/dspy/evals.rb +21 -2
data/lib/dspy/lm/adapter_factory.rb +40 -17
data/lib/dspy/lm/errors.rb +3 -0
data/lib/dspy/lm/json_strategy.rb +24 -8
data/lib/dspy/lm.rb +62 -19
data/lib/dspy/module.rb +213 -17
data/lib/dspy/prompt.rb +94 -36
data/lib/dspy/re_act.rb +50 -17
data/lib/dspy/schema/sorbet_json_schema.rb +5 -2
data/lib/dspy/schema/sorbet_toon_adapter.rb +80 -0
data/lib/dspy/structured_outputs_prompt.rb +5 -3
data/lib/dspy/type_serializer.rb +2 -1
data/lib/dspy/version.rb +1 -1
data/lib/dspy.rb +6 -0
metadata +14 -51
data/lib/dspy/lm/adapters/anthropic_adapter.rb +0 -291
data/lib/dspy/lm/adapters/gemini/schema_converter.rb +0 -186
data/lib/dspy/lm/adapters/gemini_adapter.rb +0 -220
data/lib/dspy/lm/adapters/ollama_adapter.rb +0 -73
data/lib/dspy/lm/adapters/openai/schema_converter.rb +0 -359
data/lib/dspy/lm/adapters/openai_adapter.rb +0 -188
data/lib/dspy/lm/adapters/openrouter_adapter.rb +0 -68

data/lib/dspy/lm/adapters/gemini/schema_converter.rb DELETED Viewed

@@ -1,186 +0,0 @@
-# frozen_string_literal: true
-require "sorbet-runtime"
-module DSPy
-  class LM
-    module Adapters
-      module Gemini
-        # Converts DSPy signatures to Gemini structured output format
-        class SchemaConverter
-          extend T::Sig
-          # Models that support structured outputs (JSON + Schema)
-          # Based on official Google documentation: https://ai.google.dev/gemini-api/docs/models/gemini
-          # Last updated: Oct 2025
-          # Note: Gemini 1.5 series deprecated Oct 2025
-          STRUCTURED_OUTPUT_MODELS = T.let([
-            # Gemini 2.0 series
-            "gemini-2.0-flash",
-            "gemini-2.0-flash-lite",
-            # Gemini 2.5 series (current)
-            "gemini-2.5-pro",
-            "gemini-2.5-flash",
-            "gemini-2.5-flash-lite",
-            "gemini-2.5-flash-image"
-          ].freeze, T::Array[String])
-          # Models that do not support structured outputs or are deprecated
-          UNSUPPORTED_MODELS = T.let([
-            # Legacy Gemini 1.0 series
-            "gemini-pro",
-            "gemini-1.0-pro-002",
-            "gemini-1.0-pro",
-            # Deprecated Gemini 1.5 series (removed Oct 2025)
-            "gemini-1.5-pro",
-            "gemini-1.5-pro-preview-0514",
-            "gemini-1.5-pro-preview-0409",
-            "gemini-1.5-flash",
-            "gemini-1.5-flash-8b"
-          ].freeze, T::Array[String])
-          sig { params(signature_class: T.class_of(DSPy::Signature)).returns(T::Hash[Symbol, T.untyped]) }
-          def self.to_gemini_format(signature_class)
-            # Get the output JSON schema from the signature class
-            output_schema = signature_class.output_json_schema
-            # Convert to Gemini format (OpenAPI 3.0 Schema subset - not related to OpenAI)
-            convert_dspy_schema_to_gemini(output_schema)
-          end
-          sig { params(model: String).returns(T::Boolean) }
-          def self.supports_structured_outputs?(model)
-            # Extract base model name without provider prefix
-            base_model = model.sub(/^gemini\//, "")
-            # Check if it's a supported model or a newer version
-            STRUCTURED_OUTPUT_MODELS.any? { |supported| base_model.start_with?(supported) }
-          end
-          sig { params(schema: T::Hash[Symbol, T.untyped]).returns(T::Array[String]) }
-          def self.validate_compatibility(schema)
-            issues = []
-            # Check for deeply nested objects (Gemini has depth limits)
-            depth = calculate_depth(schema)
-            if depth > 5
-              issues << "Schema depth (#{depth}) exceeds recommended limit of 5 levels"
-            end
-            issues
-          end
-          private
-          sig { params(dspy_schema: T::Hash[Symbol, T.untyped]).returns(T::Hash[Symbol, T.untyped]) }
-          def self.convert_dspy_schema_to_gemini(dspy_schema)
-            # For Gemini's responseJsonSchema, we need pure JSON Schema format
-            # Remove OpenAPI-specific fields like "$schema"
-            result = {
-              type: "object",
-              properties: {},
-              required: []
-            }
-            # Convert properties
-            properties = dspy_schema[:properties] || {}
-            properties.each do |prop_name, prop_schema|
-              result[:properties][prop_name] = convert_property_to_gemini(prop_schema)
-            end
-            # Set required fields
-            result[:required] = (dspy_schema[:required] || []).map(&:to_s)
-            result
-          end
-          sig { params(property_schema: T::Hash[Symbol, T.untyped]).returns(T::Hash[Symbol, T.untyped]) }
-          def self.convert_property_to_gemini(property_schema)
-            # Handle oneOf/anyOf schemas (union types) - Gemini supports these in responseJsonSchema
-            if property_schema[:oneOf]
-              return {
-                oneOf: property_schema[:oneOf].map { |schema| convert_property_to_gemini(schema) },
-                description: property_schema[:description]
-              }.compact
-            end
-            if property_schema[:anyOf]
-              return {
-                anyOf: property_schema[:anyOf].map { |schema| convert_property_to_gemini(schema) },
-                description: property_schema[:description]
-              }.compact
-            end
-            case property_schema[:type]
-            when "string"
-              result = { type: "string" }
-              # Gemini responseJsonSchema doesn't support const, so convert to single-value enum
-              # See: https://ai.google.dev/api/generate-content#FIELDS.response_json_schema
-              if property_schema[:const]
-                result[:enum] = [property_schema[:const]]
-              elsif property_schema[:enum]
-                result[:enum] = property_schema[:enum]
-              end
-              result
-            when "integer"
-              { type: "integer" }
-            when "number"
-              { type: "number" }
-            when "boolean"
-              { type: "boolean" }
-            when "array"
-              {
-                type: "array",
-                items: convert_property_to_gemini(property_schema[:items] || { type: "string" })
-              }
-            when "object"
-              result = { type: "object" }
-              if property_schema[:properties]
-                result[:properties] = {}
-                property_schema[:properties].each do |nested_prop, nested_schema|
-                  result[:properties][nested_prop] = convert_property_to_gemini(nested_schema)
-                end
-                # Set required fields for nested objects
-                if property_schema[:required]
-                  result[:required] = property_schema[:required].map(&:to_s)
-                end
-              end
-              result
-            else
-              # Default to string for unknown types
-              { type: "string" }
-            end
-          end
-          sig { params(schema: T::Hash[Symbol, T.untyped], current_depth: Integer).returns(Integer) }
-          def self.calculate_depth(schema, current_depth = 0)
-            return current_depth unless schema.is_a?(Hash)
-            max_depth = current_depth
-            # Check properties
-            if schema[:properties].is_a?(Hash)
-              schema[:properties].each_value do |prop|
-                if prop.is_a?(Hash)
-                  prop_depth = calculate_depth(prop, current_depth + 1)
-                  max_depth = [max_depth, prop_depth].max
-                end
-              end
-            end
-            # Check array items
-            if schema[:items].is_a?(Hash)
-              items_depth = calculate_depth(schema[:items], current_depth + 1)
-              max_depth = [max_depth, items_depth].max
-            end
-            max_depth
-          end
-        end
-      end
-    end
-  end
-end

data/lib/dspy/lm/adapters/gemini_adapter.rb DELETED Viewed

@@ -1,220 +0,0 @@
-# frozen_string_literal: true
-require 'gemini-ai'
-require 'json'
-require_relative '../vision_models'
-module DSPy
-  class LM
-    class GeminiAdapter < Adapter
-      def initialize(model:, api_key:, structured_outputs: false)
-        super(model: model, api_key: api_key)
-        validate_api_key!(api_key, 'gemini')
-        @structured_outputs_enabled = structured_outputs
-        # Disable streaming for VCR tests since SSE responses don't record properly
-        # But keep streaming enabled for SSEVCR tests (SSE-specific cassettes)
-        @use_streaming = true
-        begin
-          vcr_active = defined?(VCR) && VCR.current_cassette
-          ssevcr_active = defined?(SSEVCR) && SSEVCR.turned_on?
-          # Only disable streaming if regular VCR is active but SSEVCR is not
-          @use_streaming = false if vcr_active && !ssevcr_active
-        rescue
-          # If VCR/SSEVCR is not available or any error occurs, use streaming
-          @use_streaming = true
-        end
-        @client = Gemini.new(
-          credentials: {
-            service: 'generative-language-api',
-            api_key: api_key,
-            version: 'v1beta'  # Use beta API version for structured outputs support
-          },
-          options: {
-            model: model,
-            server_sent_events: @use_streaming
-          }
-        )
-      end
-      def chat(messages:, signature: nil, **extra_params, &block)
-        normalized_messages = normalize_messages(messages)
-        # Validate vision support if images are present
-        if contains_images?(normalized_messages)
-          VisionModels.validate_vision_support!('gemini', model)
-          # Convert messages to Gemini format with proper image handling
-          normalized_messages = format_multimodal_messages(normalized_messages)
-        end
-        # Convert DSPy message format to Gemini format
-        gemini_messages = convert_messages_to_gemini_format(normalized_messages)
-        request_params = {
-          contents: gemini_messages
-        }.merge(extra_params)
-        begin
-          content = ""
-          final_response_data = nil
-          # Check if we're using streaming or not
-          if @use_streaming
-            # Streaming mode
-            @client.stream_generate_content(request_params) do |chunk|
-              # Handle case where chunk might be a string (from SSE VCR)
-              if chunk.is_a?(String)
-                begin
-                  chunk = JSON.parse(chunk)
-                rescue JSON::ParserError => e
-                  raise AdapterError, "Failed to parse Gemini streaming response: #{e.message}"
-                end
-              end
-              # Extract content from chunks
-              if chunk.dig('candidates', 0, 'content', 'parts')
-                chunk_text = extract_text_from_parts(chunk.dig('candidates', 0, 'content', 'parts'))
-                content += chunk_text
-                # Call block only if provided (for real streaming)
-                block.call(chunk) if block_given?
-              end
-              # Store final response data (usage, metadata) from last chunk
-              if chunk['usageMetadata'] || chunk.dig('candidates', 0, 'finishReason')
-                final_response_data = chunk
-              end
-            end
-          else
-            # Non-streaming mode (for VCR tests)
-            response = @client.generate_content(request_params)
-            # Extract content from single response
-            if response.dig('candidates', 0, 'content', 'parts')
-              content = extract_text_from_parts(response.dig('candidates', 0, 'content', 'parts'))
-            end
-            # Use response as final data
-            final_response_data = response
-          end
-          # Extract usage information from final chunk
-          usage_data = final_response_data&.dig('usageMetadata')
-          usage_struct = usage_data ? UsageFactory.create('gemini', usage_data) : nil
-          # Create metadata from final chunk
-          metadata = {
-            provider: 'gemini',
-            model: model,
-            finish_reason: final_response_data&.dig('candidates', 0, 'finishReason'),
-            safety_ratings: final_response_data&.dig('candidates', 0, 'safetyRatings'),
-            streaming: block_given?
-          }
-          # Create typed metadata
-          typed_metadata = ResponseMetadataFactory.create('gemini', metadata)
-          Response.new(
-            content: content,
-            usage: usage_struct,
-            metadata: typed_metadata
-          )
-        rescue => e
-          handle_gemini_error(e)
-        end
-      end
-      private
-      # Convert DSPy message format to Gemini format
-      def convert_messages_to_gemini_format(messages)
-        # Gemini expects contents array with role and parts
-        messages.map do |msg|
-          role = case msg[:role]
-                 when 'system'
-                   'user' # Gemini doesn't have explicit system role, merge with user
-                 when 'assistant'
-                   'model'
-                 else
-                   msg[:role]
-                 end
-          if msg[:content].is_a?(Array)
-            # Multimodal content
-            parts = msg[:content].map do |item|
-              case item[:type]
-              when 'text'
-                { text: item[:text] }
-              when 'image'
-                item[:image].to_gemini_format
-              else
-                item
-              end
-            end
-            { role: role, parts: parts }
-          else
-            # Text-only content
-            { role: role, parts: [{ text: msg[:content] }] }
-          end
-        end
-      end
-      # Extract text content from Gemini parts array
-      def extract_text_from_parts(parts)
-        return "" unless parts.is_a?(Array)
-        parts.map { |part| part['text'] }.compact.join
-      end
-      # Format multimodal messages for Gemini
-      def format_multimodal_messages(messages)
-        messages.map do |msg|
-          if msg[:content].is_a?(Array)
-            # Convert multimodal content to Gemini format
-            formatted_content = msg[:content].map do |item|
-              case item[:type]
-              when 'text'
-                { type: 'text', text: item[:text] }
-              when 'image'
-                # Validate image compatibility before formatting
-                item[:image].validate_for_provider!('gemini')
-                item[:image].to_gemini_format
-              else
-                item
-              end
-            end
-            {
-              role: msg[:role],
-              content: formatted_content
-            }
-          else
-            msg
-          end
-        end
-      end
-      # Handle Gemini-specific errors
-      def handle_gemini_error(error)
-        error_msg = error.message.to_s
-        if error_msg.include?('API_KEY') || error_msg.include?('status 400') || error_msg.include?('status 401') || error_msg.include?('status 403')
-          raise AdapterError, "Gemini authentication failed: #{error_msg}. Check your API key."
-        elsif error_msg.include?('RATE_LIMIT') || error_msg.downcase.include?('quota') || error_msg.include?('status 429')
-          raise AdapterError, "Gemini rate limit exceeded: #{error_msg}. Please wait and try again."
-        elsif error_msg.include?('SAFETY') || error_msg.include?('blocked')
-          raise AdapterError, "Gemini content was blocked by safety filters: #{error_msg}"
-        elsif error_msg.include?('image') || error_msg.include?('media')
-          raise AdapterError, "Gemini image processing failed: #{error_msg}. Ensure your image is a valid format and under size limits."
-        else
-          # Generic error handling
-          raise AdapterError, "Gemini adapter error: #{error_msg}"
-        end
-      end
-    end
-  end
-end

data/lib/dspy/lm/adapters/ollama_adapter.rb DELETED Viewed

@@ -1,73 +0,0 @@
-# frozen_string_literal: true
-require 'openai'
-module DSPy
-  class LM
-    class OllamaAdapter < OpenAIAdapter
-      DEFAULT_BASE_URL = 'http://localhost:11434/v1'
-      def initialize(model:, api_key: nil, base_url: nil, structured_outputs: true)
-        # Ollama doesn't require API key for local instances
-        # But may need it for remote/protected instances
-        api_key ||= 'ollama' # OpenAI client requires non-empty key
-        base_url ||= DEFAULT_BASE_URL
-        # Store base_url before calling super
-        @base_url = base_url
-        # Don't call parent's initialize, do it manually to control client creation
-        @model = model
-        @api_key = api_key
-        @structured_outputs_enabled = structured_outputs
-        validate_configuration!
-        # Create client with custom base URL
-        @client = OpenAI::Client.new(
-          api_key: @api_key,
-          base_url: @base_url
-        )
-      end
-      def chat(messages:, signature: nil, response_format: nil, &block)
-        # For Ollama, we need to be more lenient with structured outputs
-        # as it may not fully support OpenAI's response_format spec
-        begin
-          super
-        rescue => e
-          # If structured output fails, retry with enhanced prompting
-          if @structured_outputs_enabled && signature && e.message.include?('response_format')
-            DSPy.logger.debug("Ollama structured output failed, falling back to enhanced prompting")
-            @structured_outputs_enabled = false
-            retry
-          else
-            raise
-          end
-        end
-      end
-      private
-      def validate_configuration!
-        super
-        # Additional Ollama-specific validation could go here
-      end
-      def validate_api_key!(api_key, provider)
-        # For Ollama, API key is optional for local instances
-        # Only validate if it looks like a remote URL
-        if @base_url && !@base_url.include?('localhost') && !@base_url.include?('127.0.0.1')
-          super
-        end
-      end
-      # Ollama may have different model support for structured outputs
-      def supports_structured_outputs?
-        # For now, assume all Ollama models support basic JSON mode
-        # but may not support full OpenAI structured output spec
-        true
-      end
-    end
-  end
-end