RubyGems - dspy - Versions diffs - 0.2.0 → 0.3.0 - Mend

dspy 0.2.0 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (30) hide show

checksums.yaml +4 -4
data/README.md +357 -248
data/lib/dspy/chain_of_thought.rb +151 -11
data/lib/dspy/instrumentation/token_tracker.rb +54 -0
data/lib/dspy/instrumentation.rb +100 -0
data/lib/dspy/lm/adapter.rb +41 -0
data/lib/dspy/lm/adapter_factory.rb +59 -0
data/lib/dspy/lm/adapters/anthropic_adapter.rb +96 -0
data/lib/dspy/lm/adapters/openai_adapter.rb +53 -0
data/lib/dspy/lm/adapters/ruby_llm_adapter.rb +81 -0
data/lib/dspy/lm/errors.rb +10 -0
data/lib/dspy/lm/response.rb +28 -0
data/lib/dspy/lm.rb +92 -40
data/lib/dspy/module.rb +51 -6
data/lib/dspy/predict.rb +135 -15
data/lib/dspy/re_act.rb +366 -191
data/lib/dspy/schema_adapters.rb +55 -0
data/lib/dspy/signature.rb +282 -10
data/lib/dspy/subscribers/logger_subscriber.rb +197 -0
data/lib/dspy/tools/{sorbet_tool.rb → base.rb} +33 -33
data/lib/dspy/tools.rb +1 -1
data/lib/dspy.rb +19 -10
metadata +60 -28
data/lib/dspy/ext/dry_schema.rb +0 -94
data/lib/dspy/sorbet_chain_of_thought.rb +0 -91
data/lib/dspy/sorbet_module.rb +0 -47
data/lib/dspy/sorbet_predict.rb +0 -180
data/lib/dspy/sorbet_re_act.rb +0 -332
data/lib/dspy/sorbet_signature.rb +0 -218
data/lib/dspy/types.rb +0 -3

data/lib/dspy/chain_of_thought.rb CHANGED Viewed

@@ -1,22 +1,162 @@
+# typed: strict
 # frozen_string_literal: true
+require 'sorbet-runtime'
+require_relative 'predict'
+require_relative 'signature'
+require_relative 'instrumentation'
 module DSPy
   # Enhances prediction by encouraging step-by-step reasoning
-  # before providing a final answer.
+  # before providing a final answer using Sorbet signatures.
   class ChainOfThought < Predict
+    extend T::Sig
+    FieldDescriptor = DSPy::Signature::FieldDescriptor
+    sig { params(signature_class: T.class_of(DSPy::Signature)).void }
     def initialize(signature_class)
-      @signature_class = signature_class
-      chain_of_thought_schema = Dry::Schema.JSON do
-        required(:reasoning).
-          value(:string).
-          meta(description: "Reasoning: Let's think step by step in order to #{signature_class.description}")
+      @original_signature = signature_class
+      # Create enhanced output struct with reasoning
+      enhanced_output_struct = create_enhanced_output_struct(signature_class)
+      # Create enhanced signature class
+      enhanced_signature = Class.new(DSPy::Signature) do
+        # Set the description
+        description "#{signature_class.description} Think step by step."
+        # Use the same input struct and copy field descriptors
+        @input_struct_class = signature_class.input_struct_class
+        @input_field_descriptors = signature_class.instance_variable_get(:@input_field_descriptors) || {}
+        # Use the enhanced output struct and create field descriptors for it
+        @output_struct_class = enhanced_output_struct
+        # Create field descriptors for the enhanced output struct
+        @output_field_descriptors = {}
+        # Copy original output field descriptors
+        original_output_descriptors = signature_class.instance_variable_get(:@output_field_descriptors) || {}
+        @output_field_descriptors.merge!(original_output_descriptors)
+        # Add reasoning field descriptor (ChainOfThought always provides this)
+        @output_field_descriptors[:reasoning] = FieldDescriptor.new(String, "Step by step reasoning process")
+        class << self
+          attr_reader :input_struct_class, :output_struct_class
+        end
+      end
+      # Call parent constructor with enhanced signature
+      super(enhanced_signature)
+      @signature_class = enhanced_signature
+    end
+    # Override forward_untyped to add ChainOfThought-specific instrumentation
+    sig { override.params(input_values: T.untyped).returns(T.untyped) }
+    def forward_untyped(**input_values)
+      # Prepare instrumentation payload
+      input_fields = input_values.keys.map(&:to_s)
+      # Instrument ChainOfThought lifecycle
+      result = Instrumentation.instrument('dspy.chain_of_thought', {
+        signature_class: @original_signature.name,
+        model: lm.model,
+        provider: lm.provider,
+        input_fields: input_fields
+      }) do
+        # Call parent prediction logic
+        prediction_result = super(**input_values)
+        # Analyze reasoning if present
+        if prediction_result.respond_to?(:reasoning) && prediction_result.reasoning
+          reasoning_content = prediction_result.reasoning.to_s
+          reasoning_length = reasoning_content.length
+          reasoning_steps = count_reasoning_steps(reasoning_content)
+          # Emit reasoning analysis event
+          Instrumentation.emit('dspy.chain_of_thought.reasoning_complete', {
+            signature_class: @original_signature.name,
+            reasoning_steps: reasoning_steps,
+            reasoning_length: reasoning_length,
+            has_reasoning: !reasoning_content.empty?
+          })
+        end
+        prediction_result
+      end
+      result
+    end
+    private
+    # Count reasoning steps by looking for step indicators
+    def count_reasoning_steps(reasoning_text)
+      return 0 if reasoning_text.nil? || reasoning_text.empty?
+      # Look for common step patterns
+      step_patterns = [
+        /step \d+/i,
+        /\d+\./,
+        /first|second|third|then|next|finally/i,
+        /\n\s*-/
+      ]
+      max_count = 0
+      step_patterns.each do |pattern|
+        count = reasoning_text.scan(pattern).length
+        max_count = [max_count, count].max
+      end
+      # Fallback: count sentences if no clear steps
+      max_count > 0 ? max_count : reasoning_text.split(/[.!?]+/).reject(&:empty?).length
+    end
+    sig { params(signature_class: T.class_of(DSPy::Signature)).returns(T.class_of(T::Struct)) }
+    def create_enhanced_output_struct(signature_class)
+      # Get original output props
+      original_props = signature_class.output_struct_class.props
+      # Create new struct class with reasoning added
+      Class.new(T::Struct) do
+        # Add all original fields
+        original_props.each do |name, prop|
+          # Extract the type and other options
+          type = prop[:type]
+          options = prop.except(:type, :type_object, :accessor_key, :sensitivity, :redaction)
+          # Handle default values
+          if options[:default]
+            const name, type, default: options[:default]
+          elsif options[:factory]
+            const name, type, factory: options[:factory]
+          else
+            const name, type
+          end
+        end
+        # Add reasoning field (ChainOfThought always provides this)
+        const :reasoning, String
+        # Add to_h method to serialize the struct to a hash
+        define_method :to_h do
+          hash = {}
+          # Start with input values if available
+          if self.instance_variable_defined?(:@input_values)
+            hash.merge!(self.instance_variable_get(:@input_values))
+          end
+          # Then add output properties
+          self.class.props.keys.each do |key|
+            hash[key] = self.send(key)
+          end
+          hash
+        end
       end
-      @signature_class.output_schema = Dry::Schema.JSON(parent:
-                                                          [
-                                                            @signature_class.output_schema,
-                                                            chain_of_thought_schema
-                                                          ])
     end
   end
 end

data/lib/dspy/instrumentation/token_tracker.rb ADDED Viewed

@@ -0,0 +1,54 @@
+# frozen_string_literal: true
+module DSPy
+  module Instrumentation
+    # Utility for extracting token usage from different LM adapters
+    # Uses actual token counts from API responses for accuracy
+    module TokenTracker
+      extend self
+      # Extract actual token usage from API responses
+      def extract_token_usage(response, provider)
+        case provider.to_s.downcase
+        when 'openai'
+          extract_openai_tokens(response)
+        when 'anthropic'
+          extract_anthropic_tokens(response)
+        else
+          {} # No token information for other providers
+        end
+      end
+      private
+      def extract_openai_tokens(response)
+        return {} unless response&.usage
+        usage = response.usage
+        return {} unless usage.is_a?(Hash)
+        {
+          tokens_input: usage[:prompt_tokens] || usage['prompt_tokens'],
+          tokens_output: usage[:completion_tokens] || usage['completion_tokens'],
+          tokens_total: usage[:total_tokens] || usage['total_tokens']
+        }
+      end
+      def extract_anthropic_tokens(response)
+        return {} unless response&.usage
+        usage = response.usage
+        return {} unless usage.is_a?(Hash)
+        input_tokens = usage[:input_tokens] || usage['input_tokens'] || 0
+        output_tokens = usage[:output_tokens] || usage['output_tokens'] || 0
+        {
+          tokens_input: input_tokens,
+          tokens_output: output_tokens,
+          tokens_total: input_tokens + output_tokens
+        }
+      end
+    end
+  end
+end

data/lib/dspy/instrumentation.rb ADDED Viewed

@@ -0,0 +1,100 @@
+# frozen_string_literal: true
+require 'dry-monitor'
+require 'dry-configurable'
+module DSPy
+  # Core instrumentation module using dry-monitor for event emission
+  # Provides extension points for logging, Langfuse, New Relic, and custom monitoring
+  module Instrumentation
+    def self.notifications
+      @notifications ||= Dry::Monitor::Notifications.new(:dspy).tap do |n|
+        # Register all DSPy events
+        n.register_event('dspy.lm.request')
+        n.register_event('dspy.lm.tokens')
+        n.register_event('dspy.lm.response.parsed')
+        n.register_event('dspy.predict')
+        n.register_event('dspy.predict.validation_error')
+        n.register_event('dspy.chain_of_thought')
+        n.register_event('dspy.chain_of_thought.reasoning_step')
+        n.register_event('dspy.react')
+        n.register_event('dspy.react.tool_call')
+        n.register_event('dspy.react.iteration_complete')
+        n.register_event('dspy.react.max_iterations')
+      end
+    end
+    # High-precision timing for performance tracking
+    def self.instrument(event_name, payload = {}, &block)
+      # If no block is given, return early
+      return unless block_given?
+      start_time = Process.clock_gettime(Process::CLOCK_MONOTONIC)
+      start_cpu = Process.clock_gettime(Process::CLOCK_PROCESS_CPUTIME_ID)
+      begin
+        result = yield
+        end_time = Process.clock_gettime(Process::CLOCK_MONOTONIC)
+        end_cpu = Process.clock_gettime(Process::CLOCK_PROCESS_CPUTIME_ID)
+        enhanced_payload = payload.merge(
+          duration_ms: ((end_time - start_time) * 1000).round(2),
+          cpu_time_ms: ((end_cpu - start_cpu) * 1000).round(2),
+          status: 'success',
+          timestamp: Time.now.iso8601
+        )
+        self.emit_event(event_name, enhanced_payload)
+        result
+      rescue => error
+        end_time = Process.clock_gettime(Process::CLOCK_MONOTONIC)
+        end_cpu = Process.clock_gettime(Process::CLOCK_PROCESS_CPUTIME_ID)
+        error_payload = payload.merge(
+          duration_ms: ((end_time - start_time) * 1000).round(2),
+          cpu_time_ms: ((end_cpu - start_cpu) * 1000).round(2),
+          status: 'error',
+          error_type: error.class.name,
+          error_message: error.message,
+          timestamp: Time.now.iso8601
+        )
+        self.emit_event(event_name, error_payload)
+        raise
+      end
+    end
+    # Emit event without timing (for discrete events)
+    def self.emit(event_name, payload = {})
+      enhanced_payload = payload.merge(
+        timestamp: Time.now.iso8601,
+        status: payload[:status] || 'success'
+      )
+      self.emit_event(event_name, enhanced_payload)
+    end
+    # Register additional events dynamically (useful for testing)
+    def self.register_event(event_name)
+      notifications.register_event(event_name)
+    end
+    # Subscribe to DSPy instrumentation events
+    def self.subscribe(event_pattern = nil, &block)
+      if event_pattern
+        notifications.subscribe(event_pattern, &block)
+      else
+        # Subscribe to all DSPy events
+        %w[dspy.lm.request dspy.lm.tokens dspy.lm.response.parsed dspy.predict dspy.predict.validation_error dspy.chain_of_thought dspy.chain_of_thought.reasoning_step dspy.react dspy.react.tool_call dspy.react.iteration_complete dspy.react.max_iterations].each do |event_name|
+          notifications.subscribe(event_name, &block)
+        end
+      end
+    end
+    def self.emit_event(event_name, payload)
+      notifications.instrument(event_name, payload)
+    end
+  end
+end

data/lib/dspy/lm/adapter.rb ADDED Viewed

@@ -0,0 +1,41 @@
+# frozen_string_literal: true
+module DSPy
+  class LM
+    # Base adapter interface for all LM providers
+    class Adapter
+      attr_reader :model, :api_key
+      def initialize(model:, api_key:)
+        @model = model
+        @api_key = api_key
+        validate_configuration!
+      end
+      # Chat interface that all adapters must implement
+      # @param messages [Array<Hash>] Array of message hashes with :role and :content
+      # @param block [Proc] Optional streaming block
+      # @return [DSPy::LM::Response] Normalized response
+      def chat(messages:, &block)
+        raise NotImplementedError, "Subclasses must implement #chat method"
+      end
+      private
+      def validate_configuration!
+        raise ConfigurationError, "Model is required" if model.nil? || model.empty?
+        raise ConfigurationError, "API key is required" if api_key.nil? || api_key.empty?
+      end
+      # Helper method to normalize message format
+      def normalize_messages(messages)
+        messages.map do |msg|
+          {
+            role: msg[:role].to_s,
+            content: msg[:content].to_s
+          }
+        end
+      end
+    end
+  end
+end

data/lib/dspy/lm/adapter_factory.rb ADDED Viewed

@@ -0,0 +1,59 @@
+# frozen_string_literal: true
+module DSPy
+  class LM
+    # Factory for creating appropriate adapters based on model_id
+    class AdapterFactory
+      # Maps provider prefixes to adapter classes
+      ADAPTER_MAP = {
+        'openai' => 'OpenAIAdapter',
+        'anthropic' => 'AnthropicAdapter',
+        'ruby_llm' => 'RubyLLMAdapter'
+      }.freeze
+      class << self
+        # Creates an adapter instance based on model_id
+        # @param model_id [String] Full model identifier (e.g., "openai/gpt-4")
+        # @param api_key [String] API key for the provider
+        # @return [DSPy::LM::Adapter] Appropriate adapter instance
+        def create(model_id, api_key:)
+          provider, model = parse_model_id(model_id)
+          adapter_class = get_adapter_class(provider)
+          adapter_class.new(model: model, api_key: api_key)
+        end
+        private
+        # Parse model_id to determine provider and model
+        def parse_model_id(model_id)
+          if model_id.include?('/')
+            provider, model = model_id.split('/', 2)
+            [provider, model]
+          else
+            # Legacy format: assume ruby_llm for backward compatibility
+            ['ruby_llm', model_id]
+          end
+        end
+        def get_adapter_class(provider)
+          adapter_class_name = ADAPTER_MAP[provider]
+          unless adapter_class_name
+            available_providers = ADAPTER_MAP.keys.join(', ')
+            raise UnsupportedProviderError,
+                  "Unsupported provider: #{provider}. Available: #{available_providers}"
+          end
+          begin
+            Object.const_get("DSPy::LM::#{adapter_class_name}")
+          rescue NameError
+            raise UnsupportedProviderError,
+                  "Adapter not found: DSPy::LM::#{adapter_class_name}. " \
+                  "Make sure the corresponding gem is installed."
+          end
+        end
+      end
+    end
+  end
+end

data/lib/dspy/lm/adapters/anthropic_adapter.rb ADDED Viewed

@@ -0,0 +1,96 @@
+# frozen_string_literal: true
+require 'anthropic'
+module DSPy
+  class LM
+    class AnthropicAdapter < Adapter
+      def initialize(model:, api_key:)
+        super
+        @client = Anthropic::Client.new(api_key: api_key)
+      end
+      def chat(messages:, &block)
+        # Anthropic requires system message to be separate from messages
+        system_message, user_messages = extract_system_message(normalize_messages(messages))
+        request_params = {
+          model: model,
+          messages: user_messages,
+          max_tokens: 4096, # Required for Anthropic
+          temperature: 0.0 # DSPy default for deterministic responses
+        }
+        # Add system message if present
+        request_params[:system] = system_message if system_message
+        # Add streaming if block provided
+        if block_given?
+          request_params[:stream] = true
+        end
+        begin
+          if block_given?
+            content = ""
+            @client.messages.stream(**request_params) do |chunk|
+              if chunk.respond_to?(:delta) && chunk.delta.respond_to?(:text)
+                chunk_text = chunk.delta.text
+                content += chunk_text
+                block.call(chunk)
+              end
+            end
+            Response.new(
+              content: content,
+              usage: nil, # Usage not available in streaming
+              metadata: {
+                provider: 'anthropic',
+                model: model,
+                streaming: true
+              }
+            )
+          else
+            response = @client.messages.create(**request_params)
+            if response.respond_to?(:error) && response.error
+              raise AdapterError, "Anthropic API error: #{response.error}"
+            end
+            content = response.content.first.text if response.content.is_a?(Array) && response.content.first
+            usage = response.usage
+            Response.new(
+              content: content,
+              usage: usage.respond_to?(:to_h) ? usage.to_h : usage,
+              metadata: {
+                provider: 'anthropic',
+                model: model,
+                response_id: response.id,
+                role: response.role
+              }
+            )
+          end
+        rescue => e
+          raise AdapterError, "Anthropic adapter error: #{e.message}"
+        end
+      end
+      private
+      def extract_system_message(messages)
+        system_message = nil
+        user_messages = []
+        messages.each do |msg|
+          if msg[:role] == 'system'
+            system_message = msg[:content]
+          else
+            user_messages << msg
+          end
+        end
+        [system_message, user_messages]
+      end
+    end
+  end
+end

data/lib/dspy/lm/adapters/openai_adapter.rb ADDED Viewed

@@ -0,0 +1,53 @@
+# frozen_string_literal: true
+require 'openai'
+module DSPy
+  class LM
+    class OpenAIAdapter < Adapter
+      def initialize(model:, api_key:)
+        super
+        @client = OpenAI::Client.new(api_key: api_key)
+      end
+      def chat(messages:, &block)
+        request_params = {
+          model: model,
+          messages: normalize_messages(messages),
+          temperature: 0.0 # DSPy default for deterministic responses
+        }
+        # Add streaming if block provided
+        if block_given?
+          request_params[:stream] = proc do |chunk, _bytesize|
+            block.call(chunk) if chunk.dig("choices", 0, "delta", "content")
+          end
+        end
+        begin
+          response = @client.chat.completions.create(**request_params)
+          if response.respond_to?(:error) && response.error
+            raise AdapterError, "OpenAI API error: #{response.error}"
+          end
+          content = response.choices.first.message.content
+          usage = response.usage
+          Response.new(
+            content: content,
+            usage: usage.respond_to?(:to_h) ? usage.to_h : usage,
+            metadata: {
+              provider: 'openai',
+              model: model,
+              response_id: response.id,
+              created: response.created
+            }
+          )
+        rescue => e
+          raise AdapterError, "OpenAI adapter error: #{e.message}"
+        end
+      end
+    end
+  end
+end

data/lib/dspy/lm/adapters/ruby_llm_adapter.rb ADDED Viewed

@@ -0,0 +1,81 @@
+# frozen_string_literal: true
+begin
+  require 'ruby_llm'
+rescue LoadError
+  # ruby_llm is optional for backward compatibility
+end
+module DSPy
+  class LM
+    class RubyLLMAdapter < Adapter
+      def initialize(model:, api_key:)
+        super
+        unless defined?(RubyLLM)
+          raise ConfigurationError,
+                "ruby_llm gem is required for RubyLLMAdapter. " \
+                "Add 'gem \"ruby_llm\"' to your Gemfile."
+        end
+        configure_ruby_llm
+      end
+      def chat(messages:, &block)
+        begin
+          chat = RubyLLM.chat(model: model)
+          # Add messages to chat
+          messages.each do |msg|
+            chat.add_message(role: msg[:role].to_sym, content: msg[:content])
+          end
+          # Get the last user message for ask method
+          last_user_message = messages.reverse.find { |msg| msg[:role] == 'user' }
+          if last_user_message
+            # Remove the last user message since ask() will add it
+            chat.messages.pop if chat.messages.last&.content == last_user_message[:content]
+            chat.ask(last_user_message[:content], &block)
+          else
+            raise AdapterError, "No user message found in conversation"
+          end
+          content = chat.messages.last&.content || ""
+          Response.new(
+            content: content,
+            usage: nil, # ruby_llm doesn't provide usage info
+            metadata: {
+              provider: 'ruby_llm',
+              model: model,
+              message_count: chat.messages.length
+            }
+          )
+        rescue => e
+          raise AdapterError, "RubyLLM adapter error: #{e.message}"
+        end
+      end
+      private
+      def configure_ruby_llm
+        # Determine provider from model for configuration
+        if model.include?('gpt') || model.include?('openai')
+          RubyLLM.configure do |config|
+            config.openai_api_key = api_key
+          end
+        elsif model.include?('claude') || model.include?('anthropic')
+          RubyLLM.configure do |config|
+            config.anthropic_api_key = api_key
+          end
+        else
+          # Default to OpenAI configuration
+          RubyLLM.configure do |config|
+            config.openai_api_key = api_key
+          end
+        end
+      end
+    end
+  end
+end

data/lib/dspy/lm/errors.rb ADDED Viewed

@@ -0,0 +1,10 @@
+# frozen_string_literal: true
+module DSPy
+  class LM
+    class Error < StandardError; end
+    class AdapterError < Error; end
+    class UnsupportedProviderError < Error; end
+    class ConfigurationError < Error; end
+  end
+end

data/lib/dspy/lm/response.rb ADDED Viewed

@@ -0,0 +1,28 @@
+# frozen_string_literal: true
+module DSPy
+  class LM
+    # Normalized response format for all LM providers
+    class Response
+      attr_reader :content, :usage, :metadata
+      def initialize(content:, usage: nil, metadata: {})
+        @content = content
+        @usage = usage
+        @metadata = metadata
+      end
+      def to_s
+        content
+      end
+      def to_h
+        {
+          content: content,
+          usage: usage,
+          metadata: metadata
+        }
+      end
+    end
+  end
+end