RubyGems - dspy - Versions diffs - 0.3.1 → 0.5.0 - Mend

dspy 0.3.1 → 0.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (34) hide show

checksums.yaml +4 -4
data/README.md +67 -385
data/lib/dspy/chain_of_thought.rb +123 -86
data/lib/dspy/evaluate.rb +554 -0
data/lib/dspy/example.rb +203 -0
data/lib/dspy/few_shot_example.rb +81 -0
data/lib/dspy/instrumentation/token_tracker.rb +6 -6
data/lib/dspy/instrumentation.rb +199 -18
data/lib/dspy/lm/adapter_factory.rb +6 -8
data/lib/dspy/lm.rb +79 -35
data/lib/dspy/mixins/instrumentation_helpers.rb +133 -0
data/lib/dspy/mixins/struct_builder.rb +133 -0
data/lib/dspy/mixins/type_coercion.rb +67 -0
data/lib/dspy/predict.rb +83 -128
data/lib/dspy/prompt.rb +222 -0
data/lib/dspy/propose/grounded_proposer.rb +560 -0
data/lib/dspy/re_act.rb +242 -173
data/lib/dspy/registry/registry_manager.rb +504 -0
data/lib/dspy/registry/signature_registry.rb +725 -0
data/lib/dspy/storage/program_storage.rb +442 -0
data/lib/dspy/storage/storage_manager.rb +331 -0
data/lib/dspy/subscribers/langfuse_subscriber.rb +669 -0
data/lib/dspy/subscribers/logger_subscriber.rb +180 -5
data/lib/dspy/subscribers/newrelic_subscriber.rb +686 -0
data/lib/dspy/subscribers/otel_subscriber.rb +538 -0
data/lib/dspy/teleprompt/data_handler.rb +107 -0
data/lib/dspy/teleprompt/mipro_v2.rb +790 -0
data/lib/dspy/teleprompt/simple_optimizer.rb +497 -0
data/lib/dspy/teleprompt/teleprompter.rb +336 -0
data/lib/dspy/teleprompt/utils.rb +380 -0
data/lib/dspy/version.rb +5 -0
data/lib/dspy.rb +105 -0
metadata +32 -12
data/lib/dspy/lm/adapters/ruby_llm_adapter.rb +0 -81

data/lib/dspy/lm.rb CHANGED Viewed

@@ -13,7 +13,6 @@ require_relative 'instrumentation/token_tracker'
 # Load adapters
 require_relative 'lm/adapters/openai_adapter'
 require_relative 'lm/adapters/anthropic_adapter'
-require_relative 'lm/adapters/ruby_llm_adapter'
 module DSPy
   class LM
@@ -40,38 +39,52 @@ module DSPy
       input_text = messages.map { |m| m[:content] }.join(' ')
       input_size = input_text.length
-      # Instrument LM request
-      response = Instrumentation.instrument('dspy.lm.request', {
-        gen_ai_operation_name: 'chat',
-        gen_ai_system: provider,
-        gen_ai_request_model: model,
-        signature_class: signature_class.name,
-        provider: provider,
-        adapter_class: adapter.class.name,
-        input_size: input_size
-      }) do
-        adapter.chat(messages: messages, &block)
-      end
+      # Check trace level to decide instrumentation strategy
+      trace_level = DSPy.config.instrumentation.trace_level
-      # Extract actual token usage from response (more accurate than estimation)
-      token_usage = Instrumentation::TokenTracker.extract_token_usage(response, provider)
+      # Extract token usage and prepare consolidated payload
+      response = nil
+      token_usage = {}
-      # Emit token usage event if available
-      if token_usage.any?
-        Instrumentation.emit('dspy.lm.tokens', token_usage.merge({
+      if should_emit_lm_events?(trace_level)
+        # Detailed mode: emit all LM events as before
+        response = Instrumentation.instrument('dspy.lm.request', {
+          gen_ai_operation_name: 'chat',
           gen_ai_system: provider,
           gen_ai_request_model: model,
-          signature_class: signature_class.name
-        }))
-      end
-      # Instrument response parsing
-      parsed_result = Instrumentation.instrument('dspy.lm.response.parsed', {
-        signature_class: signature_class.name,
-        provider: provider,
-        response_length: response.content&.length || 0
-      }) do
-        parse_response(response, input_values, signature_class)
+          signature_class: signature_class.name,
+          provider: provider,
+          adapter_class: adapter.class.name,
+          input_size: input_size
+        }) do
+          adapter.chat(messages: messages, &block)
+        end
+        # Extract actual token usage from response (more accurate than estimation)
+        token_usage = Instrumentation::TokenTracker.extract_token_usage(response, provider)
+        # Emit token usage event if available
+        if token_usage.any?
+          Instrumentation.emit('dspy.lm.tokens', token_usage.merge({
+            gen_ai_system: provider,
+            gen_ai_request_model: model,
+            signature_class: signature_class.name
+          }))
+        end
+        # Instrument response parsing
+        parsed_result = Instrumentation.instrument('dspy.lm.response.parsed', {
+          signature_class: signature_class.name,
+          provider: provider,
+          response_length: response.content&.length || 0
+        }) do
+          parse_response(response, input_values, signature_class)
+        end
+      else
+        # Consolidated mode: execute without nested instrumentation
+        response = adapter.chat(messages: messages, &block)
+        token_usage = Instrumentation::TokenTracker.extract_token_usage(response, provider)
+        parsed_result = parse_response(response, input_values, signature_class)
       end
       parsed_result
@@ -79,16 +92,47 @@ module DSPy
     private
-    def parse_model_id(model_id)
-      if model_id.include?('/')
-        provider, model = model_id.split('/', 2)
-        [provider, model]
+    # Determines if LM-level events should be emitted based on trace level
+    def should_emit_lm_events?(trace_level)
+      case trace_level
+      when :minimal
+        false  # Never emit LM events in minimal mode
+      when :standard
+        # In standard mode, emit LM events only if we're not in a nested context
+        !is_nested_context?
+      when :detailed
+        true  # Always emit LM events in detailed mode
       else
-        # Legacy format: assume ruby_llm for backward compatibility
-        ['ruby_llm', model_id]
+        true
       end
     end
+    # Determines if we're in a nested context where higher-level events are being emitted
+    def is_nested_context?
+      caller_locations = caller_locations(1, 30)
+      return false if caller_locations.nil?
+      # Look for higher-level DSPy modules in the call stack
+      # We consider ChainOfThought and ReAct as higher-level modules
+      higher_level_modules = caller_locations.select do |loc|
+        loc.path.include?('chain_of_thought') ||
+        loc.path.include?('re_act') ||
+        loc.path.include?('react')
+      end
+      # If we have higher-level modules in the call stack, we're in a nested context
+      higher_level_modules.any?
+    end
+    def parse_model_id(model_id)
+      unless model_id.include?('/')
+        raise ArgumentError, "model_id must include provider (e.g., 'openai/gpt-4', 'anthropic/claude-3'). Legacy format without provider is no longer supported."
+      end
+      provider, model = model_id.split('/', 2)
+      [provider, model]
+    end
     def build_messages(inference_module, input_values)
       messages = []

data/lib/dspy/mixins/instrumentation_helpers.rb ADDED Viewed

@@ -0,0 +1,133 @@
+# typed: strict
+# frozen_string_literal: true
+require 'sorbet-runtime'
+require_relative '../instrumentation'
+module DSPy
+  module Mixins
+    # Shared instrumentation helper methods for DSPy modules
+    module InstrumentationHelpers
+      extend T::Sig
+      private
+      # Prepares base instrumentation payload for prediction-based modules
+      sig { params(signature_class: T.class_of(DSPy::Signature), input_values: T::Hash[Symbol, T.untyped]).returns(T::Hash[Symbol, T.untyped]) }
+      def prepare_base_instrumentation_payload(signature_class, input_values)
+        {
+          signature_class: signature_class.name,
+          model: lm.model,
+          provider: lm.provider,
+          input_fields: input_values.keys.map(&:to_s)
+        }
+      end
+      # Instruments a prediction operation with base payload
+      sig { params(event_name: String, signature_class: T.class_of(DSPy::Signature), input_values: T::Hash[Symbol, T.untyped], additional_payload: T::Hash[Symbol, T.untyped]).returns(T.untyped) }
+      def instrument_prediction(event_name, signature_class, input_values, additional_payload = {})
+        base_payload = prepare_base_instrumentation_payload(signature_class, input_values)
+        full_payload = base_payload.merge(additional_payload)
+        # Check if we should emit this event based on trace level
+        trace_level = DSPy.config.instrumentation.trace_level
+        if should_emit_event?(event_name, trace_level)
+          Instrumentation.instrument(event_name, full_payload) do
+            yield
+          end
+        else
+          # Skip instrumentation, just execute the block
+          yield
+        end
+      end
+      # Emits a validation error event
+      sig { params(signature_class: T.class_of(DSPy::Signature), validation_type: String, error_message: String).void }
+      def emit_validation_error(signature_class, validation_type, error_message)
+        Instrumentation.emit('dspy.prediction.validation_error', {
+          signature_class: signature_class.name,
+          validation_type: validation_type,
+          validation_errors: { validation_type.to_sym => error_message }
+        })
+      end
+      # Emits a prediction completion event
+      sig { params(signature_class: T.class_of(DSPy::Signature), success: T::Boolean, additional_data: T::Hash[Symbol, T.untyped]).void }
+      def emit_prediction_complete(signature_class, success, additional_data = {})
+        Instrumentation.emit('dspy.prediction.complete', {
+          signature_class: signature_class.name,
+          success: success
+        }.merge(additional_data))
+      end
+      # Determines if an event should be emitted based on trace level
+      sig { params(event_name: String, trace_level: Symbol).returns(T::Boolean) }
+      def should_emit_event?(event_name, trace_level)
+        case trace_level
+        when :minimal
+          # Only emit the highest-level events (chain_of_thought, react, etc.)
+          event_name.match?(/^dspy\.(chain_of_thought|react)$/)
+        when :standard
+          # Emit consolidated events - skip nested events when a higher-level event is being emitted
+          # This is the key change: detect if we're in a nested context and skip lower-level events
+          if is_nested_context?
+            # If we're in a nested context, only emit higher-level events
+            event_name.match?(/^dspy\.(chain_of_thought|react)$/)
+          else
+            # If we're not in a nested context, emit all events normally
+            true
+          end
+        when :detailed
+          # Emit all events with additional correlation information
+          true
+        else
+          true
+        end
+      end
+      # Determines if this is a top-level event (not nested)
+      sig { params(event_name: String).returns(T::Boolean) }
+      def is_top_level_event?(event_name)
+        # Check if we're in a nested call by looking at the call stack
+        caller_locations = caller_locations(1, 20)
+        return false if caller_locations.nil?
+        # Look for other instrumentation calls in the stack
+        instrumentation_calls = caller_locations.select do |loc|
+          loc.label.include?('instrument_prediction') ||
+          loc.label.include?('instrument') ||
+          loc.path.include?('instrumentation')
+        end
+        # If we have more than one instrumentation call, this is nested
+        instrumentation_calls.size <= 1
+      end
+      # Determines if we're in a nested call context
+      sig { returns(T::Boolean) }
+      def is_nested_call?
+        !is_top_level_event?('')
+      end
+      # Determines if we're in a nested context where higher-level events are being emitted
+      sig { returns(T::Boolean) }
+      def is_nested_context?
+        caller_locations = caller_locations(1, 30)
+        return false if caller_locations.nil?
+        # Look for higher-level DSPy modules in the call stack
+        # We consider ChainOfThought and ReAct as higher-level modules
+        higher_level_modules = caller_locations.select do |loc|
+          loc.path.include?('chain_of_thought') ||
+          loc.path.include?('re_act') ||
+          loc.path.include?('react')
+        end
+        # If we have higher-level modules in the call stack, we're in a nested context
+        higher_level_modules.any?
+      end
+    end
+  end
+end

data/lib/dspy/mixins/struct_builder.rb ADDED Viewed

@@ -0,0 +1,133 @@
+# typed: strict
+# frozen_string_literal: true
+require 'sorbet-runtime'
+module DSPy
+  module Mixins
+    # Shared module for building enhanced structs with input/output properties
+    module StructBuilder
+      extend T::Sig
+      private
+      # Builds a new struct class with properties from multiple sources
+      sig { params(property_sources: T::Hash[Symbol, T::Hash[Symbol, T.untyped]], additional_fields: T::Hash[Symbol, T.untyped]).returns(T.class_of(T::Struct)) }
+      def build_enhanced_struct(property_sources, additional_fields = {})
+        # Capture self to access methods from within the class block
+        builder = self
+        Class.new(T::Struct) do
+          extend T::Sig
+          # Add properties from each source
+          property_sources.each do |_source_name, props|
+            props.each do |name, prop|
+              type = builder.send(:extract_type_from_prop, prop)
+              options = builder.send(:extract_options_from_prop, prop)
+              if options[:default]
+                const name, type, default: options[:default]
+              elsif options[:factory]
+                const name, type, factory: options[:factory]
+              else
+                const name, type
+              end
+            end
+          end
+          # Add additional fields specific to the enhanced struct
+          additional_fields.each do |name, field_config|
+            type = builder.send(:extract_type_from_prop, field_config)
+            options = builder.send(:extract_options_from_prop, field_config)
+            if options[:default]
+              const name, type, default: options[:default]
+            elsif options[:factory]
+              const name, type, factory: options[:factory]
+            else
+              const name, type
+            end
+          end
+          include StructSerialization
+        end
+      end
+      # Builds properties from a props hash (from T::Struct.props)
+      sig { params(props: T::Hash[Symbol, T.untyped]).void }
+      def build_properties_from_hash(props)
+        props.each { |name, prop| build_single_property(name, prop) }
+      end
+      # Builds a single property with type and options
+      sig { params(name: Symbol, prop: T.untyped).void }
+      def build_single_property(name, prop)
+        type = extract_type_from_prop(prop)
+        options = extract_options_from_prop(prop)
+        if options[:default]
+          const name, type, default: options[:default]
+        elsif options[:factory]
+          const name, type, factory: options[:factory]
+        else
+          const name, type
+        end
+      end
+      # Extracts type from property configuration
+      sig { params(prop: T.untyped).returns(T.untyped) }
+      def extract_type_from_prop(prop)
+        case prop
+        when Hash
+          prop[:type]
+        when Array
+          # Handle [Type, description] format
+          prop.first
+        else
+          prop
+        end
+      end
+      # Extracts options from property configuration
+      sig { params(prop: T.untyped).returns(T::Hash[Symbol, T.untyped]) }
+      def extract_options_from_prop(prop)
+        case prop
+        when Hash
+          prop.except(:type, :type_object, :accessor_key, :sensitivity, :redaction)
+        else
+          {}
+        end
+      end
+    end
+    # Module for adding serialization capabilities to enhanced structs
+    module StructSerialization
+      extend T::Sig
+      sig { returns(T::Hash[Symbol, T.untyped]) }
+      def to_h
+        hash = input_values_hash
+        hash.merge(output_properties_hash)
+      end
+      private
+      sig { returns(T::Hash[Symbol, T.untyped]) }
+      def input_values_hash
+        if instance_variable_defined?(:@input_values)
+          instance_variable_get(:@input_values) || {}
+        else
+          {}
+        end
+      end
+      sig { returns(T::Hash[Symbol, T.untyped]) }
+      def output_properties_hash
+        self.class.props.keys.each_with_object({}) do |key, hash|
+          hash[key] = send(key)
+        end
+      end
+    end
+  end
+end

data/lib/dspy/mixins/type_coercion.rb ADDED Viewed

@@ -0,0 +1,67 @@
+# typed: strict
+# frozen_string_literal: true
+require 'sorbet-runtime'
+module DSPy
+  module Mixins
+    # Shared module for type coercion logic across DSPy modules
+    module TypeCoercion
+      extend T::Sig
+      private
+      # Coerces output attributes to match their expected types
+      sig { params(output_attributes: T::Hash[Symbol, T.untyped], output_props: T::Hash[Symbol, T.untyped]).returns(T::Hash[Symbol, T.untyped]) }
+      def coerce_output_attributes(output_attributes, output_props)
+        output_attributes.map do |key, value|
+          prop_type = output_props[key]&.dig(:type)
+          coerced_value = coerce_value_to_type(value, prop_type)
+          [key, coerced_value]
+        end.to_h
+      end
+      # Coerces a single value to match its expected type
+      sig { params(value: T.untyped, prop_type: T.untyped).returns(T.untyped) }
+      def coerce_value_to_type(value, prop_type)
+        return value unless prop_type
+        case prop_type
+        when ->(type) { enum_type?(type) }
+          extract_enum_class(prop_type).deserialize(value)
+        when Float, ->(type) { simple_type_match?(type, Float) }
+          value.to_f
+        when Integer, ->(type) { simple_type_match?(type, Integer) }
+          value.to_i
+        else
+          value
+        end
+      end
+      # Checks if a type is an enum type
+      sig { params(type: T.untyped).returns(T::Boolean) }
+      def enum_type?(type)
+        (type.is_a?(Class) && type < T::Enum) ||
+          (type.is_a?(T::Types::Simple) && type.raw_type < T::Enum)
+      end
+      # Extracts the enum class from a type
+      sig { params(prop_type: T.untyped).returns(T.class_of(T::Enum)) }
+      def extract_enum_class(prop_type)
+        if prop_type.is_a?(Class) && prop_type < T::Enum
+          prop_type
+        elsif prop_type.is_a?(T::Types::Simple) && prop_type.raw_type < T::Enum
+          prop_type.raw_type
+        else
+          T.cast(prop_type, T.class_of(T::Enum))
+        end
+      end
+      # Checks if a type matches a simple type (like Float, Integer)
+      sig { params(type: T.untyped, target_type: T.untyped).returns(T::Boolean) }
+      def simple_type_match?(type, target_type)
+        type.is_a?(T::Types::Simple) && type.raw_type == target_type
+      end
+    end
+  end
+end