RubyGems - dspy - Versions diffs - 0.3.1 → 0.5.0 - Mend

dspy 0.3.1 → 0.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (34) hide show

checksums.yaml +4 -4
data/README.md +67 -385
data/lib/dspy/chain_of_thought.rb +123 -86
data/lib/dspy/evaluate.rb +554 -0
data/lib/dspy/example.rb +203 -0
data/lib/dspy/few_shot_example.rb +81 -0
data/lib/dspy/instrumentation/token_tracker.rb +6 -6
data/lib/dspy/instrumentation.rb +199 -18
data/lib/dspy/lm/adapter_factory.rb +6 -8
data/lib/dspy/lm.rb +79 -35
data/lib/dspy/mixins/instrumentation_helpers.rb +133 -0
data/lib/dspy/mixins/struct_builder.rb +133 -0
data/lib/dspy/mixins/type_coercion.rb +67 -0
data/lib/dspy/predict.rb +83 -128
data/lib/dspy/prompt.rb +222 -0
data/lib/dspy/propose/grounded_proposer.rb +560 -0
data/lib/dspy/re_act.rb +242 -173
data/lib/dspy/registry/registry_manager.rb +504 -0
data/lib/dspy/registry/signature_registry.rb +725 -0
data/lib/dspy/storage/program_storage.rb +442 -0
data/lib/dspy/storage/storage_manager.rb +331 -0
data/lib/dspy/subscribers/langfuse_subscriber.rb +669 -0
data/lib/dspy/subscribers/logger_subscriber.rb +180 -5
data/lib/dspy/subscribers/newrelic_subscriber.rb +686 -0
data/lib/dspy/subscribers/otel_subscriber.rb +538 -0
data/lib/dspy/teleprompt/data_handler.rb +107 -0
data/lib/dspy/teleprompt/mipro_v2.rb +790 -0
data/lib/dspy/teleprompt/simple_optimizer.rb +497 -0
data/lib/dspy/teleprompt/teleprompter.rb +336 -0
data/lib/dspy/teleprompt/utils.rb +380 -0
data/lib/dspy/version.rb +5 -0
data/lib/dspy.rb +105 -0
metadata +32 -12
data/lib/dspy/lm/adapters/ruby_llm_adapter.rb +0 -81

data/lib/dspy/predict.rb CHANGED Viewed

@@ -3,6 +3,10 @@
 require 'sorbet-runtime'
 require_relative 'module'
 require_relative 'instrumentation'
+require_relative 'prompt'
+require_relative 'mixins/struct_builder'
+require_relative 'mixins/type_coercion'
+require_relative 'mixins/instrumentation_helpers'
 module DSPy
   # Exception raised when prediction fails validation
@@ -21,56 +25,56 @@ module DSPy
   class Predict < DSPy::Module
     extend T::Sig
+    include Mixins::StructBuilder
+    include Mixins::TypeCoercion
+    include Mixins::InstrumentationHelpers
     sig { returns(T.class_of(Signature)) }
     attr_reader :signature_class
+    sig { returns(Prompt) }
+    attr_reader :prompt
     sig { params(signature_class: T.class_of(Signature)).void }
     def initialize(signature_class)
       super()
       @signature_class = signature_class
+      @prompt = Prompt.from_signature(signature_class)
     end
+    # Backward compatibility methods - delegate to prompt object
     sig { returns(String) }
     def system_signature
-      <<-PROMPT
-      Your input schema fields are:
-        ```json
-         #{JSON.generate(@signature_class.input_json_schema)}
-        ```
-      Your output schema fields are:
-        ```json
-          #{JSON.generate(@signature_class.output_json_schema)}
-        ````
-      All interactions will be structured in the following way, with the appropriate values filled in.
-      ## Input values
-        ```json
-         {input_values}
-        ```
-      ## Output values
-      Respond exclusively with the output schema fields in the json block below.
-        ```json
-          {output_values}
-        ```
-      In adhering to this structure, your objective is: #{@signature_class.description}
-      PROMPT
+      @prompt.render_system_prompt
     end
     sig { params(input_values: T::Hash[Symbol, T.untyped]).returns(String) }
     def user_signature(input_values)
-      <<-PROMPT
-        ## Input Values
-        ```json
-        #{JSON.generate(input_values)}
-        ```
-        Respond with the corresponding output schema fields wrapped in a ```json ``` block,
-         starting with the heading `## Output values`.
-      PROMPT
+      @prompt.render_user_prompt(input_values)
+    end
+    # New prompt-based interface for optimization
+    sig { params(new_prompt: Prompt).returns(Predict) }
+    def with_prompt(new_prompt)
+      # Create a new instance with the same signature but updated prompt
+      instance = self.class.new(@signature_class)
+      instance.instance_variable_set(:@prompt, new_prompt)
+      instance
+    end
+    sig { params(instruction: String).returns(Predict) }
+    def with_instruction(instruction)
+      with_prompt(@prompt.with_instruction(instruction))
+    end
+    sig { params(examples: T::Array[FewShotExample]).returns(Predict) }
+    def with_examples(examples)
+      with_prompt(@prompt.with_examples(examples))
+    end
+    sig { params(examples: T::Array[FewShotExample]).returns(Predict) }
+    def add_examples(examples)
+      with_prompt(@prompt.add_examples(examples))
     end
     sig { override.params(kwargs: T.untyped).returns(T.type_parameter(:O)) }
@@ -81,112 +85,63 @@ module DSPy
     sig { params(input_values: T.untyped).returns(T.untyped) }
     def forward_untyped(**input_values)
-      # Prepare instrumentation payload
-      input_fields = input_values.keys.map(&:to_s)
-      Instrumentation.instrument('dspy.predict', {
-        signature_class: @signature_class.name,
-        model: lm.model,
-        provider: lm.provider,
-        input_fields: input_fields
-      }) do
+      instrument_prediction('dspy.predict', @signature_class, input_values) do
         # Validate input
-        begin
-          _input_struct = @signature_class.input_struct_class.new(**input_values)
-        rescue ArgumentError => e
-          # Emit validation error event
-          Instrumentation.emit('dspy.predict.validation_error', {
-            signature_class: @signature_class.name,
-            validation_type: 'input',
-            validation_errors: { input: e.message }
-          })
-          raise PredictionInvalidError.new({ input: e.message })
-        end
-        # Call LM
+        validate_input_struct(input_values)
+        # Call LM and process response
         output_attributes = lm.chat(self, input_values)
-        output_attributes = output_attributes.transform_keys(&:to_sym)
-        output_props = @signature_class.output_struct_class.props
-        output_attributes = output_attributes.map do |key, value|
-          prop_type = output_props[key][:type] if output_props[key]
-          if prop_type
-            # Check if it's an enum (can be raw Class or T::Types::Simple)
-            enum_class = if prop_type.is_a?(Class) && prop_type < T::Enum
-                           prop_type
-                         elsif prop_type.is_a?(T::Types::Simple) && prop_type.raw_type < T::Enum
-                           prop_type.raw_type
-                         end
-            if enum_class
-              [key, enum_class.deserialize(value)]
-            elsif prop_type == Float || (prop_type.is_a?(T::Types::Simple) && prop_type.raw_type == Float)
-              [key, value.to_f]
-            elsif prop_type == Integer || (prop_type.is_a?(T::Types::Simple) && prop_type.raw_type == Integer)
-              [key, value.to_i]
-            else
-              [key, value]
-            end
-          else
-            [key, value]
-          end
-        end.to_h
-        # Create combined struct with both input and output values
-        begin
-          combined_struct = create_combined_struct_class
-          all_attributes = input_values.merge(output_attributes)
-          combined_struct.new(**all_attributes)
-        rescue ArgumentError => e
-          raise PredictionInvalidError.new({ output: e.message })
-        rescue TypeError => e
-          raise PredictionInvalidError.new({ output: e.message })
-        end
+        processed_output = process_lm_output(output_attributes)
+        # Create combined result struct
+        create_prediction_result(input_values, processed_output)
       end
     end
     private
+    # Validates input using signature struct
+    sig { params(input_values: T::Hash[Symbol, T.untyped]).void }
+    def validate_input_struct(input_values)
+      @signature_class.input_struct_class.new(**input_values)
+    rescue ArgumentError => e
+      emit_validation_error(@signature_class, 'input', e.message)
+      raise PredictionInvalidError.new({ input: e.message })
+    end
+    # Processes LM output with type coercion
+    sig { params(output_attributes: T::Hash[T.untyped, T.untyped]).returns(T::Hash[Symbol, T.untyped]) }
+    def process_lm_output(output_attributes)
+      output_attributes = output_attributes.transform_keys(&:to_sym)
+      output_props = @signature_class.output_struct_class.props
+      coerce_output_attributes(output_attributes, output_props)
+    end
+    # Creates the final prediction result struct
+    sig { params(input_values: T::Hash[Symbol, T.untyped], output_attributes: T::Hash[Symbol, T.untyped]).returns(T.untyped) }
+    def create_prediction_result(input_values, output_attributes)
+      begin
+        combined_struct = create_combined_struct_class
+        all_attributes = input_values.merge(output_attributes)
+        combined_struct.new(**all_attributes)
+      rescue ArgumentError => e
+        raise PredictionInvalidError.new({ output: e.message })
+      rescue TypeError => e
+        raise PredictionInvalidError.new({ output: e.message })
+      end
+    end
+    # Creates a combined struct class with input and output properties
     sig { returns(T.class_of(T::Struct)) }
     def create_combined_struct_class
       input_props = @signature_class.input_struct_class.props
       output_props = @signature_class.output_struct_class.props
-      # Create a new struct class that combines input and output fields
-      Class.new(T::Struct) do
-        extend T::Sig
-        # Add input fields
-        input_props.each do |name, prop_info|
-          if prop_info[:rules]&.any? { |rule| rule.is_a?(T::Props::NilableRules) }
-            prop name, prop_info[:type], default: prop_info[:default]
-          else
-            const name, prop_info[:type], default: prop_info[:default]
-          end
-        end
-        # Add output fields
-        output_props.each do |name, prop_info|
-          if prop_info[:rules]&.any? { |rule| rule.is_a?(T::Props::NilableRules) }
-            prop name, prop_info[:type], default: prop_info[:default]
-          else
-            const name, prop_info[:type], default: prop_info[:default]
-          end
-        end
-        # Add to_h method to serialize the struct to a hash
-        define_method :to_h do
-          hash = {}
-          # Add all properties
-          self.class.props.keys.each do |key|
-            hash[key] = self.send(key)
-          end
-          hash
-        end
-      end
+      build_enhanced_struct({
+        input: input_props,
+        output: output_props
+      })
     end
   end
 end

data/lib/dspy/prompt.rb ADDED Viewed

@@ -0,0 +1,222 @@
+# frozen_string_literal: true
+require 'sorbet-runtime'
+require_relative 'few_shot_example'
+module DSPy
+  class Prompt
+    extend T::Sig
+    sig { returns(String) }
+    attr_reader :instruction
+    sig { returns(T::Array[FewShotExample]) }
+    attr_reader :few_shot_examples
+    sig { returns(T::Hash[Symbol, T.untyped]) }
+    attr_reader :input_schema
+    sig { returns(T::Hash[Symbol, T.untyped]) }
+    attr_reader :output_schema
+    sig { returns(T.nilable(String)) }
+    attr_reader :signature_class_name
+    sig do
+      params(
+        instruction: String,
+        input_schema: T::Hash[Symbol, T.untyped],
+        output_schema: T::Hash[Symbol, T.untyped],
+        few_shot_examples: T::Array[FewShotExample],
+        signature_class_name: T.nilable(String)
+      ).void
+    end
+    def initialize(instruction:, input_schema:, output_schema:, few_shot_examples: [], signature_class_name: nil)
+      @instruction = instruction
+      @few_shot_examples = few_shot_examples.freeze
+      @input_schema = input_schema.freeze
+      @output_schema = output_schema.freeze
+      @signature_class_name = signature_class_name
+    end
+    # Immutable update methods for optimization
+    sig { params(new_instruction: String).returns(Prompt) }
+    def with_instruction(new_instruction)
+      self.class.new(
+        instruction: new_instruction,
+        input_schema: @input_schema,
+        output_schema: @output_schema,
+        few_shot_examples: @few_shot_examples,
+        signature_class_name: @signature_class_name
+      )
+    end
+    sig { params(new_examples: T::Array[FewShotExample]).returns(Prompt) }
+    def with_examples(new_examples)
+      self.class.new(
+        instruction: @instruction,
+        input_schema: @input_schema,
+        output_schema: @output_schema,
+        few_shot_examples: new_examples,
+        signature_class_name: @signature_class_name
+      )
+    end
+    sig { params(new_examples: T::Array[FewShotExample]).returns(Prompt) }
+    def add_examples(new_examples)
+      combined_examples = @few_shot_examples + new_examples
+      with_examples(combined_examples)
+    end
+    # Core prompt rendering methods
+    sig { returns(String) }
+    def render_system_prompt
+      sections = []
+      sections << "Your input schema fields are:"
+      sections << "```json"
+      sections << JSON.pretty_generate(@input_schema)
+      sections << "```"
+      sections << "Your output schema fields are:"
+      sections << "```json"
+      sections << JSON.pretty_generate(@output_schema)
+      sections << "```"
+      sections << ""
+      sections << "All interactions will be structured in the following way, with the appropriate values filled in."
+      # Add few-shot examples if present
+      if @few_shot_examples.any?
+        sections << ""
+        sections << "Here are some examples:"
+        sections << ""
+        @few_shot_examples.each_with_index do |example, index|
+          sections << "### Example #{index + 1}"
+          sections << example.to_prompt_section
+          sections << ""
+        end
+      end
+      sections << "## Input values"
+      sections << "```json"
+      sections << "{input_values}"
+      sections << "```"
+      sections << "## Output values"
+      sections << "Respond exclusively with the output schema fields in the json block below."
+      sections << "```json"
+      sections << "{output_values}"
+      sections << "```"
+      sections << ""
+      sections << "In adhering to this structure, your objective is: #{@instruction}"
+      sections.join("\n")
+    end
+    sig { params(input_values: T::Hash[Symbol, T.untyped]).returns(String) }
+    def render_user_prompt(input_values)
+      sections = []
+      sections << "## Input Values"
+      sections << "```json"
+      sections << JSON.pretty_generate(input_values)
+      sections << "```"
+      sections << ""
+      sections << "Respond with the corresponding output schema fields wrapped in a ```json ``` block,"
+      sections << "starting with the heading `## Output values`."
+      sections.join("\n")
+    end
+    # Generate messages for LM adapter
+    sig { params(input_values: T::Hash[Symbol, T.untyped]).returns(T::Array[T::Hash[Symbol, String]]) }
+    def to_messages(input_values)
+      [
+        { role: 'system', content: render_system_prompt },
+        { role: 'user', content: render_user_prompt(input_values) }
+      ]
+    end
+    # Serialization for persistence and optimization
+    sig { returns(T::Hash[Symbol, T.untyped]) }
+    def to_h
+      {
+        instruction: @instruction,
+        few_shot_examples: @few_shot_examples.map(&:to_h),
+        input_schema: @input_schema,
+        output_schema: @output_schema,
+        signature_class_name: @signature_class_name
+      }
+    end
+    sig { params(hash: T::Hash[Symbol, T.untyped]).returns(Prompt) }
+    def self.from_h(hash)
+      examples = (hash[:few_shot_examples] || []).map { |ex| FewShotExample.from_h(ex) }
+      new(
+        instruction: hash[:instruction] || "",
+        input_schema: hash[:input_schema] || {},
+        output_schema: hash[:output_schema] || {},
+        few_shot_examples: examples,
+        signature_class_name: hash[:signature_class_name]
+      )
+    end
+    # Create prompt from signature class
+    sig { params(signature_class: T.class_of(Signature)).returns(Prompt) }
+    def self.from_signature(signature_class)
+      new(
+        instruction: signature_class.description || "Complete this task.",
+        input_schema: signature_class.input_json_schema,
+        output_schema: signature_class.output_json_schema,
+        few_shot_examples: [],
+        signature_class_name: signature_class.name
+      )
+    end
+    # Comparison and diff methods for optimization
+    sig { params(other: T.untyped).returns(T::Boolean) }
+    def ==(other)
+      return false unless other.is_a?(Prompt)
+      @instruction == other.instruction &&
+        @few_shot_examples == other.few_shot_examples &&
+        @input_schema == other.input_schema &&
+        @output_schema == other.output_schema
+    end
+    sig { params(other: Prompt).returns(T::Hash[Symbol, T.untyped]) }
+    def diff(other)
+      changes = {}
+      changes[:instruction] = {
+        from: @instruction,
+        to: other.instruction
+      } if @instruction != other.instruction
+      changes[:few_shot_examples] = {
+        from: @few_shot_examples.length,
+        to: other.few_shot_examples.length,
+        added: other.few_shot_examples - @few_shot_examples,
+        removed: @few_shot_examples - other.few_shot_examples
+      } if @few_shot_examples != other.few_shot_examples
+      changes
+    end
+    # Statistics for optimization tracking
+    sig { returns(T::Hash[Symbol, T.untyped]) }
+    def stats
+      {
+        character_count: @instruction.length,
+        example_count: @few_shot_examples.length,
+        total_example_chars: @few_shot_examples.sum { |ex| ex.to_prompt_section.length },
+        input_fields: @input_schema.dig(:properties)&.keys&.length || 0,
+        output_fields: @output_schema.dig(:properties)&.keys&.length || 0
+      }
+    end
+  end
+end