RubyGems - dspy - Versions diffs - 0.21.0 → 0.22.1 - Mend

dspy 0.21.0 → 0.22.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (11) hide show

checksums.yaml +4 -4
data/lib/dspy/events/subscriber_mixin.rb +79 -0
data/lib/dspy/events/subscribers.rb +43 -0
data/lib/dspy/events/types.rb +218 -0
data/lib/dspy/events.rb +83 -0
data/lib/dspy/mixins/type_coercion.rb +21 -1
data/lib/dspy/teleprompt/gepa.rb +637 -0
data/lib/dspy/teleprompt/teleprompter.rb +1 -1
data/lib/dspy/version.rb +1 -1
data/lib/dspy.rb +93 -1
metadata +9 -3

checksums.yaml CHANGED Viewed

@@ -1,7 +1,7 @@
 ---
 SHA256:
-  metadata.gz: 78e01258a3b9b5a1bccddad913a1d0aa45ecb145a65adb3e691986cadbea6e23
-  data.tar.gz: 9a778ea689150002e0766357dd4aa4af526be81979378b9996ba9067c2cdbd42
+  metadata.gz: 35e148ea7f8b9d9239489008409167bce63fce8bbb51798837573a93cc82bd73
+  data.tar.gz: 69304272af26457e557189b743c59bcddb25f9d05ba485e5fec1e61cee5be4ad
 SHA512:
-  metadata.gz: 7ba1376b844e5c5e61215b961142a70f82d758db41e061bf2e7404f4ffdbae867197b0c38254a92b25892beb51922fb3c3b963ab2474494c9d490b83814bba5d
-  data.tar.gz: 4a543e0b954469f316f003f36c5a55b97b0794ef1cbf395180ac5197acf5d4091bfe3ea87e342473c190d96adf5761baff59532502a20778edea23a83a9e5253
+  metadata.gz: 998377fc4c8d444029e83e9b01f5e65efd28df06abc07a8b120258a91ef6894c6a0b75ffa398526c305894fe9b5a22eb389b0e9646a1f26d341af6aea736101b
+  data.tar.gz: 77e0ccd6a18fd3495bd785acfc0d45555f7632b895bb9dbe583e1b42622270f9fb9f9f242cac43f01259971829decb7e8306ad070a2fe9e2a4a9c655bbeb675b

data/lib/dspy/events/subscriber_mixin.rb ADDED Viewed

@@ -0,0 +1,79 @@
+# frozen_string_literal: true
+require 'sorbet-runtime'
+module DSPy
+  module Events
+    # Mixin for adding class-level event subscriptions
+    # Provides a clean way to subscribe to events at the class level
+    # instead of requiring instance-based subscriptions
+    #
+    # Usage:
+    #   class MyTracker
+    #     include DSPy::Events::SubscriberMixin
+    #
+    #     add_subscription('llm.*') do |name, attrs|
+    #       # Handle LLM events globally for this class
+    #     end
+    #   end
+    module SubscriberMixin
+      extend T::Sig
+      def self.included(base)
+        base.extend(ClassMethods)
+        base.class_eval do
+          @event_subscriptions = []
+          @subscription_mutex = Mutex.new
+          # Initialize subscriptions when the class is first loaded
+          @subscriptions_initialized = false
+        end
+      end
+      module ClassMethods
+        extend T::Sig
+        # Add a class-level event subscription
+        sig { params(pattern: String, block: T.proc.params(arg0: String, arg1: T::Hash[T.any(String, Symbol), T.untyped]).void).returns(String) }
+        def add_subscription(pattern, &block)
+          subscription_mutex.synchronize do
+            subscription_id = DSPy.events.subscribe(pattern, &block)
+            event_subscriptions << subscription_id
+            subscription_id
+          end
+        end
+        # Remove all subscriptions for this class
+        sig { void }
+        def unsubscribe_all
+          subscription_mutex.synchronize do
+            event_subscriptions.each { |id| DSPy.events.unsubscribe(id) }
+            event_subscriptions.clear
+          end
+        end
+        # Get list of active subscription IDs
+        sig { returns(T::Array[String]) }
+        def subscriptions
+          subscription_mutex.synchronize do
+            event_subscriptions.dup
+          end
+        end
+        private
+        # Thread-safe access to subscriptions array
+        sig { returns(T::Array[String]) }
+        def event_subscriptions
+          @event_subscriptions ||= []
+        end
+        # Thread-safe access to mutex
+        sig { returns(Mutex) }
+        def subscription_mutex
+          @subscription_mutex ||= Mutex.new
+        end
+      end
+    end
+  end
+end

data/lib/dspy/events/subscribers.rb ADDED Viewed

@@ -0,0 +1,43 @@
+# frozen_string_literal: true
+module DSPy
+  module Events
+    # Base subscriber class for event-driven patterns
+    # This provides the foundation for creating custom event subscribers
+    #
+    # Example usage:
+    #   class MySubscriber < DSPy::Events::BaseSubscriber
+    #     def subscribe
+    #       add_subscription('llm.*') do |event_name, attributes|
+    #         # Handle LLM events
+    #       end
+    #     end
+    #   end
+    #
+    #   subscriber = MySubscriber.new
+    #   # subscriber will start receiving events
+    #   subscriber.unsubscribe # Clean up when done
+    class BaseSubscriber
+      def initialize
+        @subscriptions = []
+      end
+      def subscribe
+        raise NotImplementedError, "Subclasses must implement #subscribe"
+      end
+      def unsubscribe
+        @subscriptions.each { |id| DSPy.events.unsubscribe(id) }
+        @subscriptions.clear
+      end
+      protected
+      def add_subscription(pattern, &block)
+        subscription_id = DSPy.events.subscribe(pattern, &block)
+        @subscriptions << subscription_id
+        subscription_id
+      end
+    end
+  end
+end

data/lib/dspy/events/types.rb ADDED Viewed

@@ -0,0 +1,218 @@
+# frozen_string_literal: true
+require 'sorbet-runtime'
+module DSPy
+  module Events
+    # Base event structure using Sorbet T::Struct
+    class Event < T::Struct
+      const :name, String
+      const :timestamp, Time
+      const :attributes, T::Hash[T.any(String, Symbol), T.untyped], default: {}
+      def initialize(name:, timestamp: Time.now, attributes: {})
+        super(name: name, timestamp: timestamp, attributes: attributes)
+      end
+      def to_attributes
+        result = attributes.dup
+        result[:timestamp] = timestamp
+        result
+      end
+    end
+    # Token usage structure for LLM events
+    class TokenUsage < T::Struct
+      const :prompt_tokens, Integer
+      const :completion_tokens, Integer
+      def total_tokens
+        prompt_tokens + completion_tokens
+      end
+    end
+    # LLM operation events with semantic conventions
+    class LLMEvent < T::Struct
+      VALID_PROVIDERS = T.let(
+        ['openai', 'anthropic', 'google', 'azure', 'ollama', 'together', 'groq', 'cohere'].freeze,
+        T::Array[String]
+      )
+      # Common event fields
+      const :name, String
+      const :timestamp, Time
+      # LLM-specific fields
+      const :provider, String
+      const :model, String
+      const :usage, T.nilable(TokenUsage), default: nil
+      const :duration_ms, T.nilable(Numeric), default: nil
+      const :temperature, T.nilable(Float), default: nil
+      const :max_tokens, T.nilable(Integer), default: nil
+      const :stream, T.nilable(T::Boolean), default: nil
+      def initialize(name:, provider:, model:, timestamp: Time.now, usage: nil, duration_ms: nil, temperature: nil, max_tokens: nil, stream: nil)
+        unless VALID_PROVIDERS.include?(provider.downcase)
+          raise ArgumentError, "Invalid provider '#{provider}'. Must be one of: #{VALID_PROVIDERS.join(', ')}"
+        end
+        super(
+          name: name,
+          timestamp: timestamp,
+          provider: provider.downcase,
+          model: model,
+          usage: usage,
+          duration_ms: duration_ms,
+          temperature: temperature,
+          max_tokens: max_tokens,
+          stream: stream
+        )
+      end
+      def to_otel_attributes
+        attrs = {
+          'gen_ai.system' => provider,
+          'gen_ai.request.model' => model
+        }
+        if usage
+          attrs['gen_ai.usage.prompt_tokens'] = usage.prompt_tokens
+          attrs['gen_ai.usage.completion_tokens'] = usage.completion_tokens
+          attrs['gen_ai.usage.total_tokens'] = usage.total_tokens
+        end
+        attrs['gen_ai.request.temperature'] = temperature if temperature
+        attrs['gen_ai.request.max_tokens'] = max_tokens if max_tokens
+        attrs['gen_ai.request.stream'] = stream if stream
+        attrs['duration_ms'] = duration_ms if duration_ms
+        attrs
+      end
+      def to_attributes
+        result = to_otel_attributes.dup
+        result[:timestamp] = timestamp
+        result[:provider] = provider
+        result[:model] = model
+        result[:duration_ms] = duration_ms if duration_ms
+        result
+      end
+    end
+    # DSPy module execution events
+    class ModuleEvent < T::Struct
+      # Common event fields
+      const :name, String
+      const :timestamp, Time
+      # Module-specific fields
+      const :module_name, String
+      const :signature_name, T.nilable(String), default: nil
+      const :input_fields, T.nilable(T::Array[String]), default: nil
+      const :output_fields, T.nilable(T::Array[String]), default: nil
+      const :duration_ms, T.nilable(Numeric), default: nil
+      const :success, T.nilable(T::Boolean), default: nil
+      def initialize(name:, module_name:, timestamp: Time.now, signature_name: nil, input_fields: nil, output_fields: nil, duration_ms: nil, success: nil)
+        super(
+          name: name,
+          timestamp: timestamp,
+          module_name: module_name,
+          signature_name: signature_name,
+          input_fields: input_fields,
+          output_fields: output_fields,
+          duration_ms: duration_ms,
+          success: success
+        )
+      end
+      def to_attributes
+        result = { timestamp: timestamp }
+        result[:module_name] = module_name
+        result[:signature_name] = signature_name if signature_name
+        result[:input_fields] = input_fields if input_fields
+        result[:output_fields] = output_fields if output_fields
+        result[:duration_ms] = duration_ms if duration_ms
+        result[:success] = success if success
+        result
+      end
+    end
+    # Optimization and training events
+    class OptimizationEvent < T::Struct
+      # Common event fields
+      const :name, String
+      const :timestamp, Time
+      # Optimization-specific fields
+      const :optimizer_name, String
+      const :trial_number, T.nilable(Integer), default: nil
+      const :score, T.nilable(Float), default: nil
+      const :best_score, T.nilable(Float), default: nil
+      const :parameters, T.nilable(T::Hash[T.any(String, Symbol), T.untyped]), default: nil
+      const :duration_ms, T.nilable(Numeric), default: nil
+      def initialize(name:, optimizer_name:, timestamp: Time.now, trial_number: nil, score: nil, best_score: nil, parameters: nil, duration_ms: nil)
+        super(
+          name: name,
+          timestamp: timestamp,
+          optimizer_name: optimizer_name,
+          trial_number: trial_number,
+          score: score,
+          best_score: best_score,
+          parameters: parameters,
+          duration_ms: duration_ms
+        )
+      end
+      def to_attributes
+        result = { timestamp: timestamp }
+        result[:optimizer_name] = optimizer_name
+        result[:trial_number] = trial_number if trial_number
+        result[:score] = score if score
+        result[:best_score] = best_score if best_score
+        result[:parameters] = parameters if parameters
+        result[:duration_ms] = duration_ms if duration_ms
+        result
+      end
+    end
+    # Evaluation events
+    class EvaluationEvent < T::Struct
+      # Common event fields
+      const :name, String
+      const :timestamp, Time
+      # Evaluation-specific fields
+      const :evaluator_name, String
+      const :metric_name, T.nilable(String), default: nil
+      const :score, T.nilable(Float), default: nil
+      const :total_examples, T.nilable(Integer), default: nil
+      const :passed_examples, T.nilable(Integer), default: nil
+      const :duration_ms, T.nilable(Numeric), default: nil
+      def initialize(name:, evaluator_name:, timestamp: Time.now, metric_name: nil, score: nil, total_examples: nil, passed_examples: nil, duration_ms: nil)
+        super(
+          name: name,
+          timestamp: timestamp,
+          evaluator_name: evaluator_name,
+          metric_name: metric_name,
+          score: score,
+          total_examples: total_examples,
+          passed_examples: passed_examples,
+          duration_ms: duration_ms
+        )
+      end
+      def to_attributes
+        result = { timestamp: timestamp }
+        result[:evaluator_name] = evaluator_name
+        result[:metric_name] = metric_name if metric_name
+        result[:score] = score if score
+        result[:total_examples] = total_examples if total_examples
+        result[:passed_examples] = passed_examples if passed_examples
+        result[:duration_ms] = duration_ms if duration_ms
+        result
+      end
+    end
+  end
+end

data/lib/dspy/events.rb ADDED Viewed

@@ -0,0 +1,83 @@
+# frozen_string_literal: true
+require 'securerandom'
+module DSPy
+  # Events module to hold typed event structures
+  module Events
+    # Will be defined in events/types.rb
+  end
+  class EventRegistry
+    def initialize
+      @listeners = {}
+      @subscription_counter = 0
+      @mutex = Mutex.new
+    end
+    def subscribe(pattern, &block)
+      return unless block_given?
+      subscription_id = SecureRandom.uuid
+      @mutex.synchronize do
+        @listeners[subscription_id] = {
+          pattern: pattern,
+          block: block
+        }
+      end
+      subscription_id
+    end
+    def unsubscribe(subscription_id)
+      @mutex.synchronize do
+        @listeners.delete(subscription_id)
+      end
+    end
+    def clear_listeners
+      @mutex.synchronize do
+        @listeners.clear
+      end
+    end
+    def notify(event_name, attributes)
+      # Take a snapshot of current listeners to avoid holding the mutex during execution
+      # This allows listeners to be modified while others are executing
+      matching_listeners = @mutex.synchronize do
+        @listeners.select do |id, listener|
+          pattern_matches?(listener[:pattern], event_name)
+        end.dup  # Create a copy to avoid shared state
+      end
+      matching_listeners.each do |id, listener|
+        begin
+          listener[:block].call(event_name, attributes)
+        rescue => e
+          # Log the error but continue processing other listeners
+          # Use emit_log directly to avoid infinite recursion
+          DSPy.send(:emit_log, 'event.listener.error', {
+            subscription_id: id,
+            error_class: e.class.name,
+            error_message: e.message,
+            event_name: event_name
+          })
+        end
+      end
+    end
+    private
+    def pattern_matches?(pattern, event_name)
+      if pattern.include?('*')
+        # Convert wildcard pattern to regex
+        # llm.* becomes ^llm\..*$
+        regex_pattern = "^#{Regexp.escape(pattern).gsub('\\*', '.*')}$"
+        Regexp.new(regex_pattern).match?(event_name)
+      else
+        # Exact match
+        pattern == event_name
+      end
+    end
+  end
+end

data/lib/dspy/mixins/type_coercion.rb CHANGED Viewed

@@ -140,8 +140,28 @@ module DSPy
         # Convert string keys to symbols
         symbolized_hash = value.transform_keys(&:to_sym)
+        # Get struct properties to understand what fields are expected
+        struct_props = struct_class.props
+        # Remove the _type field that DSPy adds for discriminating structs,
+        # but only if it's NOT a legitimate field in the struct definition
+        if !struct_props.key?(:_type) && symbolized_hash.key?(:_type)
+          symbolized_hash = symbolized_hash.except(:_type)
+        end
+        # Recursively coerce nested struct fields
+        coerced_hash = symbolized_hash.map do |key, val|
+          prop_info = struct_props[key]
+          if prop_info && prop_info[:type]
+            coerced_value = coerce_value_to_type(val, prop_info[:type])
+            [key, coerced_value]
+          else
+            [key, val]
+          end
+        end.to_h
         # Create the struct instance
-        struct_class.new(**symbolized_hash)
+        struct_class.new(**coerced_hash)
       rescue ArgumentError => e
         # If struct creation fails, return the original value
         DSPy.logger.debug("Failed to coerce to struct #{struct_class}: #{e.message}")

data/lib/dspy/teleprompt/gepa.rb ADDED Viewed

@@ -0,0 +1,637 @@
+# frozen_string_literal: true
+require 'sorbet-runtime'
+require_relative 'teleprompter'
+module DSPy
+  module Teleprompt
+    # GEPA: Genetic-Pareto Reflective Prompt Evolution optimizer
+    # Uses natural language reflection to evolve prompts through genetic algorithms
+    # and Pareto frontier selection for maintaining diverse high-performing candidates
+    class GEPA < Teleprompter
+      extend T::Sig
+      # Immutable execution trace record using Ruby's Data class
+      # Captures execution events for GEPA's reflective analysis
+      class ExecutionTrace < Data.define(
+        :trace_id,
+        :event_name,
+        :timestamp,
+        :span_id,
+        :attributes,
+        :metadata
+      )
+        extend T::Sig
+        # Type aliases for better type safety
+        AttributesHash = T.type_alias { T::Hash[T.any(String, Symbol), T.untyped] }
+        MetadataHash = T.type_alias { T::Hash[Symbol, T.untyped] }
+        sig do
+          params(
+            trace_id: String,
+            event_name: String,
+            timestamp: Time,
+            span_id: T.nilable(String),
+            attributes: AttributesHash,
+            metadata: T.nilable(MetadataHash)
+          ).void
+        end
+        def initialize(trace_id:, event_name:, timestamp:, span_id: nil, attributes: {}, metadata: nil)
+          # Freeze nested structures for true immutability
+          frozen_attributes = attributes.freeze
+          frozen_metadata = metadata&.freeze
+          super(
+            trace_id: trace_id,
+            event_name: event_name,
+            timestamp: timestamp,
+            span_id: span_id,
+            attributes: frozen_attributes,
+            metadata: frozen_metadata
+          )
+        end
+        # Check if this is an LLM-related trace
+        sig { returns(T::Boolean) }
+        def llm_trace?
+          event_name.start_with?('llm.') || event_name.start_with?('lm.')
+        end
+        # Check if this is a module-related trace
+        sig { returns(T::Boolean) }
+        def module_trace?
+          !llm_trace? && (
+            event_name.include?('chain_of_thought') ||
+            event_name.include?('react') ||
+            event_name.include?('codeact') ||
+            event_name.include?('predict')
+          )
+        end
+        # Extract token usage from LLM traces
+        sig { returns(Integer) }
+        def token_usage
+          return 0 unless llm_trace?
+          # Try different token attribute keys
+          [
+            'gen_ai.usage.total_tokens',
+            'gen_ai.usage.prompt_tokens',
+            'tokens',
+            :tokens
+          ].each do |key|
+            value = attributes[key]
+            return value.to_i if value
+          end
+          0
+        end
+        # Convert to hash representation
+        sig { returns(T::Hash[Symbol, T.untyped]) }
+        def to_h
+          {
+            trace_id: trace_id,
+            event_name: event_name,
+            timestamp: timestamp,
+            span_id: span_id,
+            attributes: attributes,
+            metadata: metadata
+          }
+        end
+        # Extract prompt text from trace
+        sig { returns(T.nilable(String)) }
+        def prompt_text
+          attributes[:prompt] || attributes['prompt']
+        end
+        # Extract response text from trace
+        sig { returns(T.nilable(String)) }
+        def response_text
+          attributes[:response] || attributes['response']
+        end
+        # Get the model used in this trace
+        sig { returns(T.nilable(String)) }
+        def model_name
+          attributes['gen_ai.request.model'] || attributes[:model]
+        end
+        # Get the signature class name
+        sig { returns(T.nilable(String)) }
+        def signature_name
+          attributes['dspy.signature'] || attributes[:signature]
+        end
+      end
+      # Immutable reflection analysis result using Ruby's Data class
+      # Stores the output of GEPA's reflective analysis on execution traces
+      class ReflectionResult < Data.define(
+        :trace_id,
+        :diagnosis,
+        :improvements,
+        :confidence,
+        :reasoning,
+        :suggested_mutations,
+        :metadata
+      )
+        extend T::Sig
+        # Type aliases for better type safety
+        ImprovementsList = T.type_alias { T::Array[String] }
+        MutationsList = T.type_alias { T::Array[Symbol] }
+        MetadataHash = T.type_alias { T::Hash[Symbol, T.untyped] }
+        sig do
+          params(
+            trace_id: String,
+            diagnosis: String,
+            improvements: ImprovementsList,
+            confidence: Float,
+            reasoning: String,
+            suggested_mutations: MutationsList,
+            metadata: MetadataHash
+          ).void
+        end
+        def initialize(trace_id:, diagnosis:, improvements:, confidence:, reasoning:, suggested_mutations:, metadata:)
+          # Validate confidence score
+          if confidence < 0.0 || confidence > 1.0
+            raise ArgumentError, "confidence must be between 0 and 1, got #{confidence}"
+          end
+          # Freeze nested structures for true immutability
+          frozen_improvements = improvements.freeze
+          frozen_mutations = suggested_mutations.freeze
+          frozen_metadata = metadata.freeze
+          super(
+            trace_id: trace_id,
+            diagnosis: diagnosis,
+            improvements: frozen_improvements,
+            confidence: confidence,
+            reasoning: reasoning,
+            suggested_mutations: frozen_mutations,
+            metadata: frozen_metadata
+          )
+        end
+        # Check if this reflection has high confidence (>= 0.8)
+        sig { returns(T::Boolean) }
+        def high_confidence?
+          confidence >= 0.8
+        end
+        # Check if this reflection suggests actionable changes
+        sig { returns(T::Boolean) }
+        def actionable?
+          improvements.any? || suggested_mutations.any?
+        end
+        # Get mutations sorted by priority (simple alphabetical for Phase 1)
+        sig { returns(MutationsList) }
+        def mutation_priority
+          suggested_mutations.sort
+        end
+        # Convert to hash representation
+        sig { returns(T::Hash[Symbol, T.untyped]) }
+        def to_h
+          {
+            trace_id: trace_id,
+            diagnosis: diagnosis,
+            improvements: improvements,
+            confidence: confidence,
+            reasoning: reasoning,
+            suggested_mutations: suggested_mutations,
+            metadata: metadata
+          }
+        end
+        # Generate a concise summary of this reflection
+        sig { returns(String) }
+        def summary
+          confidence_pct = (confidence * 100).round
+          mutation_list = suggested_mutations.map(&:to_s).join(', ')
+          "#{diagnosis.split('.').first}. " \
+          "Confidence: #{confidence_pct}%. " \
+          "#{improvements.size} improvements suggested. " \
+          "Mutations: #{mutation_list}."
+        end
+        # Check if reflection model was used
+        sig { returns(T.nilable(String)) }
+        def reflection_model
+          metadata[:reflection_model]
+        end
+        # Get token usage from reflection analysis
+        sig { returns(Integer) }
+        def token_usage
+          metadata[:token_usage] || 0
+        end
+        # Get analysis duration in milliseconds
+        sig { returns(Integer) }
+        def analysis_duration_ms
+          metadata[:analysis_duration_ms] || 0
+        end
+      end
+      # TraceCollector aggregates execution traces from DSPy events
+      # Uses SubscriberMixin for class-level event subscriptions
+      class TraceCollector
+        include DSPy::Events::SubscriberMixin
+        extend T::Sig
+        sig { void }
+        def initialize
+          @traces = T.let([], T::Array[ExecutionTrace])
+          @traces_mutex = T.let(Mutex.new, Mutex)
+          setup_subscriptions
+        end
+        sig { returns(T::Array[ExecutionTrace]) }
+        attr_reader :traces
+        # Get count of collected traces
+        sig { returns(Integer) }
+        def collected_count
+          @traces_mutex.synchronize { @traces.size }
+        end
+        # Collect trace from event data
+        sig { params(event_name: String, event_data: T::Hash[T.any(String, Symbol), T.untyped]).void }
+        def collect_trace(event_name, event_data)
+          @traces_mutex.synchronize do
+            trace_id = event_data['trace_id'] || event_data[:trace_id] || generate_trace_id
+            # Avoid duplicates
+            return if @traces.any? { |t| t.trace_id == trace_id }
+            timestamp = event_data['timestamp'] || event_data[:timestamp] || Time.now
+            span_id = event_data['span_id'] || event_data[:span_id]
+            attributes = event_data['attributes'] || event_data[:attributes] || {}
+            metadata = event_data['metadata'] || event_data[:metadata] || {}
+            trace = ExecutionTrace.new(
+              trace_id: trace_id,
+              event_name: event_name,
+              timestamp: timestamp,
+              span_id: span_id,
+              attributes: attributes,
+              metadata: metadata
+            )
+            @traces << trace
+          end
+        end
+        # Get traces for a specific optimization run
+        sig { params(run_id: String).returns(T::Array[ExecutionTrace]) }
+        def traces_for_run(run_id)
+          @traces_mutex.synchronize do
+            @traces.select do |trace|
+              metadata = trace.metadata
+              metadata && metadata[:optimization_run_id] == run_id
+            end
+          end
+        end
+        # Get only LLM traces
+        sig { returns(T::Array[ExecutionTrace]) }
+        def llm_traces
+          @traces_mutex.synchronize { @traces.select(&:llm_trace?) }
+        end
+        # Get only module traces
+        sig { returns(T::Array[ExecutionTrace]) }
+        def module_traces
+          @traces_mutex.synchronize { @traces.select(&:module_trace?) }
+        end
+        # Clear all collected traces
+        sig { void }
+        def clear
+          @traces_mutex.synchronize { @traces.clear }
+        end
+        private
+        # Set up event subscriptions using SubscriberMixin
+        sig { void }
+        def setup_subscriptions
+          # Subscribe to LLM events
+          self.class.add_subscription('llm.*') do |name, attrs|
+            collect_trace(name, attrs)
+          end
+          # Subscribe to module events
+          self.class.add_subscription('*.reasoning_complete') do |name, attrs|
+            collect_trace(name, attrs)
+          end
+          self.class.add_subscription('*.predict_complete') do |name, attrs|
+            collect_trace(name, attrs)
+          end
+        end
+        # Generate unique trace ID
+        sig { returns(String) }
+        def generate_trace_id
+          "gepa-trace-#{SecureRandom.hex(4)}"
+        end
+      end
+      # ReflectionEngine performs natural language reflection on execution traces
+      # This is the core component that analyzes traces and generates improvement insights
+      class ReflectionEngine
+        extend T::Sig
+        sig { returns(GEPAConfig) }
+        attr_reader :config
+        sig { params(config: T.nilable(GEPAConfig)).void }
+        def initialize(config = nil)
+          @config = config || GEPAConfig.new
+        end
+        # Perform reflective analysis on execution traces
+        sig { params(traces: T::Array[ExecutionTrace]).returns(ReflectionResult) }
+        def reflect_on_traces(traces)
+          reflection_id = generate_reflection_id
+          if traces.empty?
+            return ReflectionResult.new(
+              trace_id: reflection_id,
+              diagnosis: 'No traces available for analysis',
+              improvements: [],
+              confidence: 0.0,
+              reasoning: 'Cannot provide reflection without execution traces',
+              suggested_mutations: [],
+              metadata: {
+                reflection_model: @config.reflection_lm,
+                analysis_timestamp: Time.now,
+                trace_count: 0
+              }
+            )
+          end
+          patterns = analyze_execution_patterns(traces)
+          improvements = generate_improvement_suggestions(patterns)
+          mutations = suggest_mutations(patterns)
+          # For Phase 1, we generate a simple rule-based analysis
+          # Future phases will use LLM-based reflection
+          diagnosis = generate_diagnosis(patterns)
+          reasoning = generate_reasoning(patterns, traces)
+          confidence = calculate_confidence(patterns)
+          ReflectionResult.new(
+            trace_id: reflection_id,
+            diagnosis: diagnosis,
+            improvements: improvements,
+            confidence: confidence,
+            reasoning: reasoning,
+            suggested_mutations: mutations,
+            metadata: {
+              reflection_model: @config.reflection_lm,
+              analysis_timestamp: Time.now,
+              trace_count: traces.size,
+              token_usage: 0 # Phase 1 doesn't use actual LLM reflection
+            }
+          )
+        end
+        # Analyze patterns in execution traces
+        sig { params(traces: T::Array[ExecutionTrace]).returns(T::Hash[Symbol, T.untyped]) }
+        def analyze_execution_patterns(traces)
+          llm_traces = traces.select(&:llm_trace?)
+          module_traces = traces.select(&:module_trace?)
+          total_tokens = llm_traces.sum(&:token_usage)
+          unique_models = llm_traces.map(&:model_name).compact.uniq
+          {
+            llm_traces_count: llm_traces.size,
+            module_traces_count: module_traces.size,
+            total_tokens: total_tokens,
+            unique_models: unique_models,
+            avg_response_length: calculate_avg_response_length(llm_traces),
+            trace_timespan: calculate_timespan(traces)
+          }
+        end
+        # Generate improvement suggestions based on patterns
+        sig { params(patterns: T::Hash[Symbol, T.untyped]).returns(T::Array[String]) }
+        def generate_improvement_suggestions(patterns)
+          suggestions = []
+          if patterns[:total_tokens] > 500
+            suggestions << 'Consider reducing prompt length to lower token usage'
+          end
+          if patterns[:avg_response_length] < 10
+            suggestions << 'Responses seem brief - consider asking for more detailed explanations'
+          end
+          if patterns[:llm_traces_count] > patterns[:module_traces_count] * 3
+            suggestions << 'High LLM usage detected - consider optimizing reasoning chains'
+          end
+          if patterns[:unique_models].size > 1
+            suggestions << 'Multiple models used - consider standardizing on one model for consistency'
+          end
+          suggestions << 'Add step-by-step reasoning instructions' if suggestions.empty?
+          suggestions
+        end
+        # Suggest mutation operations based on patterns
+        sig { params(patterns: T::Hash[Symbol, T.untyped]).returns(T::Array[Symbol]) }
+        def suggest_mutations(patterns)
+          mutations = []
+          avg_length = patterns[:avg_response_length] || 0
+          total_tokens = patterns[:total_tokens] || 0
+          llm_count = patterns[:llm_traces_count] || 0
+          mutations << :expand if avg_length < 15
+          mutations << :simplify if total_tokens > 300
+          mutations << :combine if llm_count > 2
+          mutations << :rewrite if llm_count == 1
+          mutations << :rephrase if mutations.empty?
+          mutations.uniq
+        end
+        private
+        # Generate unique reflection ID
+        sig { returns(String) }
+        def generate_reflection_id
+          "reflection-#{SecureRandom.hex(4)}"
+        end
+        # Generate diagnosis text
+        sig { params(patterns: T::Hash[Symbol, T.untyped]).returns(String) }
+        def generate_diagnosis(patterns)
+          if patterns[:total_tokens] > 400
+            'High token usage indicates potential inefficiency in prompt design'
+          elsif patterns[:llm_traces_count] == 0
+            'No LLM interactions found - execution may not be working as expected'
+          elsif patterns[:avg_response_length] < 10
+            'Responses are unusually brief which may indicate prompt clarity issues'
+          else
+            'Execution patterns appear normal with room for optimization'
+          end
+        end
+        # Generate reasoning text
+        sig { params(patterns: T::Hash[Symbol, T.untyped], traces: T::Array[ExecutionTrace]).returns(String) }
+        def generate_reasoning(patterns, traces)
+          reasoning_parts = []
+          reasoning_parts << "Analyzed #{traces.size} execution traces"
+          reasoning_parts << "#{patterns[:llm_traces_count]} LLM interactions"
+          reasoning_parts << "#{patterns[:module_traces_count]} module operations"
+          reasoning_parts << "Total token usage: #{patterns[:total_tokens]}"
+          reasoning_parts.join('. ') + '.'
+        end
+        # Calculate confidence based on patterns
+        sig { params(patterns: T::Hash[Symbol, T.untyped]).returns(Float) }
+        def calculate_confidence(patterns)
+          base_confidence = 0.7
+          # More traces = higher confidence
+          trace_bonus = [patterns[:llm_traces_count] + patterns[:module_traces_count], 10].min * 0.02
+          # Reasonable token usage = higher confidence
+          token_penalty = patterns[:total_tokens] > 1000 ? -0.1 : 0.0
+          [(base_confidence + trace_bonus + token_penalty), 1.0].min
+        end
+        # Calculate average response length from LLM traces
+        sig { params(llm_traces: T::Array[ExecutionTrace]).returns(Integer) }
+        def calculate_avg_response_length(llm_traces)
+          return 0 if llm_traces.empty?
+          total_length = llm_traces.sum do |trace|
+            response = trace.response_text
+            response ? response.length : 0
+          end
+          total_length / llm_traces.size
+        end
+        # Calculate timespan of traces
+        sig { params(traces: T::Array[ExecutionTrace]).returns(Float) }
+        def calculate_timespan(traces)
+          return 0.0 if traces.size < 2
+          timestamps = traces.map(&:timestamp).sort
+          (timestamps.last - timestamps.first).to_f
+        end
+      end
+      # Configuration for GEPA optimization
+      class GEPAConfig < Config
+        extend T::Sig
+        sig { returns(String) }
+        attr_accessor :reflection_lm
+        sig { returns(Integer) }
+        attr_accessor :num_generations
+        sig { returns(Integer) }
+        attr_accessor :population_size
+        sig { returns(Float) }
+        attr_accessor :mutation_rate
+        sig { returns(T::Boolean) }
+        attr_accessor :use_pareto_selection
+        sig { void }
+        def initialize
+          super
+          @reflection_lm = 'gpt-4o'
+          @num_generations = 10
+          @population_size = 8
+          @mutation_rate = 0.7
+          @use_pareto_selection = true
+        end
+        sig { returns(T::Hash[Symbol, T.untyped]) }
+        def to_h
+          super.merge({
+            reflection_lm: @reflection_lm,
+            num_generations: @num_generations,
+            population_size: @population_size,
+            mutation_rate: @mutation_rate,
+            use_pareto_selection: @use_pareto_selection
+          })
+        end
+      end
+      sig { returns(GEPAConfig) }
+      attr_reader :config
+      sig do
+        params(
+          metric: T.nilable(T.proc.params(arg0: T.untyped, arg1: T.untyped).returns(T.untyped)),
+          config: T.nilable(GEPAConfig)
+        ).void
+      end
+      def initialize(metric: nil, config: nil)
+        @config = config || GEPAConfig.new
+        super(metric: metric, config: @config)
+      end
+      # Main optimization method
+      sig do
+        params(
+          program: T.untyped,
+          trainset: T::Array[T.untyped],
+          valset: T.nilable(T::Array[T.untyped])
+        ).returns(OptimizationResult)
+      end
+      def compile(program, trainset:, valset: nil)
+        validate_inputs(program, trainset, valset)
+        instrument_step('gepa_compile', {
+          trainset_size: trainset.size,
+          valset_size: valset&.size || 0,
+          num_generations: @config.num_generations,
+          population_size: @config.population_size
+        }) do
+          # For Phase 1, return a basic optimization result
+          # Future phases will implement the full genetic algorithm
+          OptimizationResult.new(
+            optimized_program: program,
+            scores: { gepa_score: 0.0 },
+            history: {
+              num_generations: @config.num_generations,
+              population_size: @config.population_size,
+              phase: 'Phase 1 - Basic Structure'
+            },
+            best_score_name: 'gepa_score',
+            best_score_value: 0.0,
+            metadata: {
+              optimizer: 'GEPA',
+              reflection_lm: @config.reflection_lm,
+              implementation_status: 'Phase 1 - Infrastructure Complete'
+            }
+          )
+        end
+      end
+    end
+  end
+end

data/lib/dspy/teleprompt/teleprompter.rb CHANGED Viewed

@@ -316,7 +316,7 @@ module DSPy
           operation: "optimization.#{step_name}",
           'dspy.module' => 'Teleprompter',
           'teleprompter.class' => self.class.name,
-          'teleprompter.config' => @config.to_h,
+          'teleprompter.config' => @config.to_h.to_json,
           **payload
         ) do
           yield

data/lib/dspy/version.rb CHANGED Viewed

@@ -1,5 +1,5 @@
 # frozen_string_literal: true
 module DSPy
-  VERSION = "0.21.0"
+  VERSION = "0.22.1"
 end

data/lib/dspy.rb CHANGED Viewed

@@ -9,6 +9,8 @@ require_relative 'dspy/errors'
 require_relative 'dspy/type_serializer'
 require_relative 'dspy/observability'
 require_relative 'dspy/context'
+require_relative 'dspy/events'
+require_relative 'dspy/events/types'
 module DSPy
   extend Dry::Configurable
@@ -34,18 +36,105 @@ module DSPy
   end
   def self.log(event, **attributes)
+    # Return nil early if logger is not configured (backward compatibility)
+    return nil unless logger
+    # Forward to event system - this maintains backward compatibility
+    # while providing all new event system benefits
+    event(event, attributes)
+    # Return nil to maintain backward compatibility
+    nil
+  end
+  def self.event(event_name_or_object, attributes = {})
+    # Handle typed event objects
+    if event_name_or_object.respond_to?(:name) && event_name_or_object.respond_to?(:to_attributes)
+      event_obj = event_name_or_object
+      event_name = event_obj.name
+      attributes = event_obj.to_attributes
+      # For LLM events, use OpenTelemetry semantic conventions for spans
+      if event_obj.is_a?(DSPy::Events::LLMEvent)
+        otel_attributes = event_obj.to_otel_attributes
+        create_event_span(event_name, otel_attributes)
+      else
+        create_event_span(event_name, attributes)
+      end
+    else
+      # Handle string event names (backward compatibility)
+      event_name = event_name_or_object
+      raise ArgumentError, "Event name cannot be nil" if event_name.nil?
+      # Handle nil attributes
+      attributes = {} if attributes.nil?
+      # Create OpenTelemetry span for the event if observability is enabled
+      create_event_span(event_name, attributes)
+    end
+    # Perform the actual logging (original DSPy.log behavior)
+    emit_log(event_name, attributes)
+    # Notify event listeners
+    events.notify(event_name, attributes)
+  end
+  def self.events
+    @event_registry ||= DSPy::EventRegistry.new
+  end
+  private
+  def self.emit_log(event_name, attributes)
     return unless logger
     # Merge context automatically (but don't include span_stack)
     context = Context.current.dup
     context.delete(:span_stack)
     attributes = context.merge(attributes)
-    attributes[:event] = event
+    attributes[:event] = event_name
     # Use Dry::Logger's structured logging
     logger.info(attributes)
   end
+  def self.create_event_span(event_name, attributes)
+    return unless DSPy::Observability.enabled?
+    begin
+      # Flatten nested hashes for OpenTelemetry span attributes
+      flattened_attributes = flatten_attributes(attributes)
+      # Create and immediately finish a span for this event
+      # Events are instant moments in time, not ongoing operations
+      span = DSPy::Observability.start_span(event_name, flattened_attributes)
+      DSPy::Observability.finish_span(span) if span
+    rescue => e
+      # Log error but don't let it break the event system
+      # Use emit_log directly to avoid infinite recursion
+      emit_log('event.span_creation_error', {
+        error_class: e.class.name,
+        error_message: e.message,
+        event_name: event_name
+      })
+    end
+  end
+  def self.flatten_attributes(attributes, parent_key = '', result = {})
+    attributes.each do |key, value|
+      new_key = parent_key.empty? ? key.to_s : "#{parent_key}.#{key}"
+      if value.is_a?(Hash)
+        flatten_attributes(value, new_key, result)
+      else
+        result[new_key] = value
+      end
+    end
+    result
+  end
   def self.create_logger
     env = ENV['RACK_ENV'] || ENV['RAILS_ENV'] || 'development'
     log_output = ENV['DSPY_LOG'] # Allow override
@@ -101,6 +190,8 @@ require_relative 'dspy/image'
 require_relative 'dspy/strategy'
 require_relative 'dspy/prediction'
 require_relative 'dspy/predict'
+require_relative 'dspy/events/subscribers'
+require_relative 'dspy/events/subscriber_mixin'
 require_relative 'dspy/chain_of_thought'
 require_relative 'dspy/re_act'
 require_relative 'dspy/code_act'
@@ -111,6 +202,7 @@ require_relative 'dspy/teleprompt/data_handler'
 require_relative 'dspy/propose/grounded_proposer'
 require_relative 'dspy/teleprompt/simple_optimizer'
 require_relative 'dspy/teleprompt/mipro_v2'
+require_relative 'dspy/teleprompt/gepa'
 require_relative 'dspy/tools'
 require_relative 'dspy/memory'
 require_relative 'dspy/storage/program_storage'

metadata CHANGED Viewed

@@ -1,13 +1,13 @@
 --- !ruby/object:Gem::Specification
 name: dspy
 version: !ruby/object:Gem::Version
-  version: 0.21.0
+  version: 0.22.1
 platform: ruby
 authors:
 - Vicente Reig Rincón de Arellano
 bindir: bin
 cert_chain: []
-date: 2025-09-01 00:00:00.000000000 Z
+date: 2025-09-05 00:00:00.000000000 Z
 dependencies:
 - !ruby/object:Gem::Dependency
   name: dry-configurable
@@ -177,7 +177,8 @@ dependencies:
     - - "~>"
       - !ruby/object:Gem::Version
         version: '0.30'
-description: The Ruby framework for programming with large language models.
+description: The Ruby framework for programming with large language models. Includes
+  event-driven observability system with OpenTelemetry integration and Langfuse export.
 email:
 - hey@vicente.services
 executables: []
@@ -192,6 +193,10 @@ files:
 - lib/dspy/error_formatter.rb
 - lib/dspy/errors.rb
 - lib/dspy/evaluate.rb
+- lib/dspy/events.rb
+- lib/dspy/events/subscriber_mixin.rb
+- lib/dspy/events/subscribers.rb
+- lib/dspy/events/types.rb
 - lib/dspy/example.rb
 - lib/dspy/few_shot_example.rb
 - lib/dspy/field.rb
@@ -244,6 +249,7 @@ files:
 - lib/dspy/storage/storage_manager.rb
 - lib/dspy/strategy.rb
 - lib/dspy/teleprompt/data_handler.rb
+- lib/dspy/teleprompt/gepa.rb
 - lib/dspy/teleprompt/mipro_v2.rb
 - lib/dspy/teleprompt/simple_optimizer.rb
 - lib/dspy/teleprompt/teleprompter.rb