RubyGems - dspy - Versions diffs - 0.33.0 → 0.34.0 - Mend

dspy 0.33.0 → 0.34.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (10) hide show

checksums.yaml +4 -4
data/README.md +15 -4
data/lib/dspy/evals.rb +45 -2
data/lib/dspy/scores/data_type.rb +30 -0
data/lib/dspy/scores/evaluators.rb +279 -0
data/lib/dspy/scores/score_event.rb +56 -0
data/lib/dspy/scores.rb +135 -0
data/lib/dspy/version.rb +1 -1
data/lib/dspy.rb +1 -0
metadata +5 -1

checksums.yaml CHANGED Viewed

@@ -1,7 +1,7 @@
 ---
 SHA256:
-  metadata.gz: 52dc686ff0347f7844a3b6fc476b31737f3467d5d179974f34a98b8dbbd12073
-  data.tar.gz: 0e39c94a4766c481167268f49e42277d297b688ee9f960181785062e69f91572
+  metadata.gz: 3bf98e1e8f5f939799d7e14717d8859b10830144a9c23f1d4818e6fa021fb46a
+  data.tar.gz: 154e27f97ed2c3ae5b8a04f2d3941a93e79a0e88bae8459fd73a85a9d03ed186
 SHA512:
-  metadata.gz: bb4fb2ce89ed600e971a07cfabe3eb9edd344563aa77df57304dbec565121eeb9c8a53ba4cdd66f04c81cb3b1231d222a59fb4a15962680051c07de49c080dca
-  data.tar.gz: 2543dd3bc228c98a1ab82c14ce8fffbed86342fa661af674976b90b10752334a5606257401f9ff953bd82f36aa29fe816895acec9e30a5219e8c8dc2d3ea1727
+  metadata.gz: 298305a05b5a38806d67989c01ed2a476f291d13ec6c8a228c8d465fa925d56b178b6c0865d4e9282b4e6ab29aa9655e9b7fc64a228478524d2ce94d3015758f
+  data.tar.gz: 207ff7188ff0bcb16bfd6b45893672085363683464889a4d3e3ba666d2989f2daa0260660da00f7e29ba0df360ca85a430536f38fae319a2e2106405e99e0af9

data/README.md CHANGED Viewed

@@ -3,7 +3,7 @@
 [![Gem Version](https://img.shields.io/gem/v/dspy)](https://rubygems.org/gems/dspy)
 [![Total Downloads](https://img.shields.io/gem/dt/dspy)](https://rubygems.org/gems/dspy)
 [![Build Status](https://img.shields.io/github/actions/workflow/status/vicentereig/dspy.rb/ruby.yml?branch=main&label=build)](https://github.com/vicentereig/dspy.rb/actions/workflows/ruby.yml)
-[![Documentation](https://img.shields.io/badge/docs-vicentereig.github.io%2Fdspy.rb-blue)](https://vicentereig.github.io/dspy.rb/)
+[![Documentation](https://img.shields.io/badge/docs-oss.vicente.services%2Fdspy.rb-blue)](https://oss.vicente.services/dspy.rb/)
 [![Discord](https://img.shields.io/discord/1161519468141355160?label=discord&logo=discord&logoColor=white)](https://discord.gg/zWBhrMqn)
 > [!NOTE]
@@ -248,13 +248,24 @@ DSPy.rb has gone from experimental to production-ready in three fast releases.
 ## Documentation
-📖 **[Complete Documentation Website](https://vicentereig.github.io/dspy.rb/)**
+📖 **[Complete Documentation Website](https://oss.vicente.services/dspy.rb/)**
 ### LLM-Friendly Documentation
 For LLMs and AI assistants working with DSPy.rb:
-- **[llms.txt](https://vicentereig.github.io/dspy.rb/llms.txt)** - Concise reference optimized for LLMs
-- **[llms-full.txt](https://vicentereig.github.io/dspy.rb/llms-full.txt)** - Comprehensive API documentation
+- **[llms.txt](https://oss.vicente.services/dspy.rb/llms.txt)** - Concise reference optimized for LLMs
+- **[llms-full.txt](https://oss.vicente.services/dspy.rb/llms-full.txt)** - Comprehensive API documentation
+### Claude Skill
+A [Claude Skill](https://github.com/vicentereig/dspy-rb-skill) is available to help you build DSPy.rb applications with Claude Code or claude.ai.
+**Claude Code:**
+```bash
+git clone https://github.com/vicentereig/dspy-rb-skill ~/.claude/skills/dspy-rb
+```
+**Claude.ai (Pro/Max):** Download the [skill as a ZIP](https://github.com/vicentereig/dspy-rb-skill/archive/refs/heads/main.zip) and upload via Settings > Skills.
 ### Getting Started
 - **[Installation & Setup](docs/src/getting-started/installation.md)** - Detailed installation and configuration

data/lib/dspy/evals.rb CHANGED Viewed

@@ -191,6 +191,12 @@ module DSPy
     sig { returns(T.nilable(BatchEvaluationResult)) }
     attr_reader :last_batch_result
+    sig { returns(T::Boolean) }
+    attr_reader :export_scores
+    sig { returns(String) }
+    attr_reader :score_name
     include DSPy::Callbacks
     create_before_callback :call, wrap: false
@@ -227,16 +233,20 @@ module DSPy
         num_threads: T.nilable(Integer),
         max_errors: T.nilable(Integer),
         failure_score: T.nilable(Numeric),
-        provide_traceback: T::Boolean
+        provide_traceback: T::Boolean,
+        export_scores: T::Boolean,
+        score_name: String
       ).void
     end
-    def initialize(program, metric: nil, num_threads: 1, max_errors: 5, failure_score: 0.0, provide_traceback: true)
+    def initialize(program, metric: nil, num_threads: 1, max_errors: 5, failure_score: 0.0, provide_traceback: true, export_scores: false, score_name: 'evaluation')
       @program = program
       @metric = metric
       @num_threads = num_threads || 1
       @max_errors = max_errors || 5
       @provide_traceback = provide_traceback
       @failure_score = failure_score ? failure_score.to_f : 0.0
+      @export_scores = export_scores
+      @score_name = score_name
       @last_example_result = nil
       @last_batch_result = nil
     end
@@ -665,6 +675,11 @@ module DSPy
         score: result.metrics[:score],
         error: result.metrics[:error]
       })
+      # Export score to Langfuse if enabled
+      if @export_scores
+        export_example_score(example, result)
+      end
     rescue => e
       DSPy.log('evals.example.observation_error', error: e.message)
     end
@@ -678,10 +693,38 @@ module DSPy
         pass_rate: batch_result.pass_rate,
         score: batch_result.score
       })
+      # Export batch score to Langfuse if enabled
+      if @export_scores
+        export_batch_score(batch_result)
+      end
     rescue => e
       DSPy.log('evals.batch.observation_error', error: e.message)
     end
+    def export_example_score(example, result)
+      score_value = result.metrics[:score] || (result.passed ? 1.0 : 0.0)
+      example_id = extract_example_id(example)
+      DSPy.score(
+        @score_name,
+        score_value,
+        comment: "Example: #{example_id || 'unknown'}, passed: #{result.passed}"
+      )
+    rescue => e
+      DSPy.log('evals.score_export_error', error: e.message)
+    end
+    def export_batch_score(batch_result)
+      DSPy.score(
+        "#{@score_name}_batch",
+        batch_result.pass_rate,
+        comment: "Batch: #{batch_result.passed_examples}/#{batch_result.total_examples} passed"
+      )
+    rescue => e
+      DSPy.log('evals.batch_score_export_error', error: e.message)
+    end
     def extract_example_id(example)
       if example.respond_to?(:id)
         example.id

data/lib/dspy/scores/data_type.rb ADDED Viewed

@@ -0,0 +1,30 @@
+# frozen_string_literal: true
+require 'sorbet-runtime'
+module DSPy
+  module Scores
+    # Langfuse score data types
+    # Maps to: NUMERIC, BOOLEAN, CATEGORICAL
+    class DataType < T::Enum
+      extend T::Sig
+      enums do
+        Numeric = new('NUMERIC')
+        Boolean = new('BOOLEAN')
+        Categorical = new('CATEGORICAL')
+      end
+      sig { params(value: String).returns(DataType) }
+      def self.deserialize(value)
+        case value
+        when 'NUMERIC' then Numeric
+        when 'BOOLEAN' then Boolean
+        when 'CATEGORICAL' then Categorical
+        else
+          raise ArgumentError, "Unknown DataType: #{value}"
+        end
+      end
+    end
+  end
+end

data/lib/dspy/scores/evaluators.rb ADDED Viewed

@@ -0,0 +1,279 @@
+# frozen_string_literal: true
+require 'sorbet-runtime'
+require 'json'
+module DSPy
+  module Scores
+    # Built-in evaluators for common evaluation patterns
+    # Each evaluator returns a ScoreEvent that can be exported to Langfuse
+    module Evaluators
+      extend T::Sig
+      # Exact string match evaluator
+      # Returns 1.0 if output exactly matches expected, 0.0 otherwise
+      sig do
+        params(
+          output: String,
+          expected: String,
+          name: String,
+          ignore_case: T::Boolean,
+          comment: T.nilable(String),
+          trace_id: T.nilable(String),
+          observation_id: T.nilable(String),
+          emit: T::Boolean
+        ).returns(ScoreEvent)
+      end
+      def self.exact_match(
+        output:,
+        expected:,
+        name: 'exact_match',
+        ignore_case: false,
+        comment: nil,
+        trace_id: nil,
+        observation_id: nil,
+        emit: true
+      )
+        match = if ignore_case
+                  output.downcase == expected.downcase
+                else
+                  output == expected
+                end
+        DSPy::Scores.create(
+          name: name,
+          value: match ? 1.0 : 0.0,
+          data_type: DataType::Numeric,
+          comment: comment || (match ? 'Exact match' : 'No match'),
+          trace_id: trace_id,
+          observation_id: observation_id,
+          emit: emit
+        )
+      end
+      # Substring containment evaluator
+      # Returns 1.0 if output contains expected, 0.0 otherwise
+      sig do
+        params(
+          output: String,
+          expected: String,
+          name: String,
+          ignore_case: T::Boolean,
+          comment: T.nilable(String),
+          trace_id: T.nilable(String),
+          observation_id: T.nilable(String),
+          emit: T::Boolean
+        ).returns(ScoreEvent)
+      end
+      def self.contains(
+        output:,
+        expected:,
+        name: 'contains',
+        ignore_case: false,
+        comment: nil,
+        trace_id: nil,
+        observation_id: nil,
+        emit: true
+      )
+        match = if ignore_case
+                  output.downcase.include?(expected.downcase)
+                else
+                  output.include?(expected)
+                end
+        DSPy::Scores.create(
+          name: name,
+          value: match ? 1.0 : 0.0,
+          data_type: DataType::Numeric,
+          comment: comment || (match ? 'Contains expected' : 'Does not contain expected'),
+          trace_id: trace_id,
+          observation_id: observation_id,
+          emit: emit
+        )
+      end
+      # Regular expression match evaluator
+      # Returns 1.0 if output matches pattern, 0.0 otherwise
+      sig do
+        params(
+          output: String,
+          pattern: T.any(Regexp, String),
+          name: String,
+          comment: T.nilable(String),
+          trace_id: T.nilable(String),
+          observation_id: T.nilable(String),
+          emit: T::Boolean
+        ).returns(ScoreEvent)
+      end
+      def self.regex_match(
+        output:,
+        pattern:,
+        name: 'regex_match',
+        comment: nil,
+        trace_id: nil,
+        observation_id: nil,
+        emit: true
+      )
+        regex = pattern.is_a?(Regexp) ? pattern : Regexp.new(pattern)
+        match = regex.match?(output)
+        DSPy::Scores.create(
+          name: name,
+          value: match ? 1.0 : 0.0,
+          data_type: DataType::Numeric,
+          comment: comment || (match ? 'Regex matched' : 'Regex did not match'),
+          trace_id: trace_id,
+          observation_id: observation_id,
+          emit: emit
+        )
+      end
+      # Length check evaluator
+      # Returns 1.0 if output length is within range, 0.0 otherwise
+      sig do
+        params(
+          output: String,
+          min_length: T.nilable(Integer),
+          max_length: T.nilable(Integer),
+          name: String,
+          comment: T.nilable(String),
+          trace_id: T.nilable(String),
+          observation_id: T.nilable(String),
+          emit: T::Boolean
+        ).returns(ScoreEvent)
+      end
+      def self.length_check(
+        output:,
+        min_length: nil,
+        max_length: nil,
+        name: 'length_check',
+        comment: nil,
+        trace_id: nil,
+        observation_id: nil,
+        emit: true
+      )
+        length = output.length
+        valid = true
+        valid = false if min_length && length < min_length
+        valid = false if max_length && length > max_length
+        DSPy::Scores.create(
+          name: name,
+          value: valid ? 1.0 : 0.0,
+          data_type: DataType::Numeric,
+          comment: comment || "Length: #{length} (min: #{min_length || 'none'}, max: #{max_length || 'none'})",
+          trace_id: trace_id,
+          observation_id: observation_id,
+          emit: emit
+        )
+      end
+      # Levenshtein similarity evaluator
+      # Returns normalized similarity score between 0.0 and 1.0
+      sig do
+        params(
+          output: String,
+          expected: String,
+          name: String,
+          comment: T.nilable(String),
+          trace_id: T.nilable(String),
+          observation_id: T.nilable(String),
+          emit: T::Boolean
+        ).returns(ScoreEvent)
+      end
+      def self.similarity(
+        output:,
+        expected:,
+        name: 'similarity',
+        comment: nil,
+        trace_id: nil,
+        observation_id: nil,
+        emit: true
+      )
+        distance = levenshtein_distance(output, expected)
+        max_length = [output.length, expected.length].max
+        score = max_length.zero? ? 1.0 : 1.0 - (distance.to_f / max_length)
+        DSPy::Scores.create(
+          name: name,
+          value: score.round(4),
+          data_type: DataType::Numeric,
+          comment: comment || "Levenshtein distance: #{distance}",
+          trace_id: trace_id,
+          observation_id: observation_id,
+          emit: emit
+        )
+      end
+      # JSON validity evaluator
+      # Returns 1.0 if output is valid JSON, 0.0 otherwise
+      sig do
+        params(
+          output: String,
+          name: String,
+          comment: T.nilable(String),
+          trace_id: T.nilable(String),
+          observation_id: T.nilable(String),
+          emit: T::Boolean
+        ).returns(ScoreEvent)
+      end
+      def self.json_valid(
+        output:,
+        name: 'json_valid',
+        comment: nil,
+        trace_id: nil,
+        observation_id: nil,
+        emit: true
+      )
+        valid = begin
+          JSON.parse(output)
+          true
+        rescue JSON::ParserError
+          false
+        end
+        DSPy::Scores.create(
+          name: name,
+          value: valid ? 1.0 : 0.0,
+          data_type: DataType::Numeric,
+          comment: comment || (valid ? 'Valid JSON' : 'Invalid JSON'),
+          trace_id: trace_id,
+          observation_id: observation_id,
+          emit: emit
+        )
+      end
+      # Levenshtein distance implementation
+      sig { params(str1: String, str2: String).returns(Integer) }
+      def self.levenshtein_distance(str1, str2)
+        m = str1.length
+        n = str2.length
+        return n if m.zero?
+        return m if n.zero?
+        # Create distance matrix
+        d = Array.new(m + 1) { Array.new(n + 1, 0) }
+        # Initialize first column
+        (0..m).each { |i| d[i][0] = i }
+        # Initialize first row
+        (0..n).each { |j| d[0][j] = j }
+        # Fill in the rest of the matrix
+        (1..m).each do |i|
+          (1..n).each do |j|
+            cost = str1[i - 1] == str2[j - 1] ? 0 : 1
+            d[i][j] = [
+              d[i - 1][j] + 1,     # deletion
+              d[i][j - 1] + 1,     # insertion
+              d[i - 1][j - 1] + cost # substitution
+            ].min
+          end
+        end
+        d[m][n]
+      end
+    end
+  end
+end

data/lib/dspy/scores/score_event.rb ADDED Viewed

@@ -0,0 +1,56 @@
+# frozen_string_literal: true
+require 'sorbet-runtime'
+require 'securerandom'
+require_relative 'data_type'
+module DSPy
+  module Scores
+    # Represents a score to be sent to Langfuse
+    # Immutable struct with all score attributes
+    class ScoreEvent < T::Struct
+      extend T::Sig
+      # Unique identifier for the score (idempotency key)
+      prop :id, String, factory: -> { SecureRandom.uuid }
+      # Score name/identifier (required)
+      prop :name, String
+      # Score value - numeric, boolean (0/1), or categorical (string)
+      prop :value, T.any(Numeric, String)
+      # Data type for the score
+      prop :data_type, DataType, default: DataType::Numeric
+      # Optional human-readable comment
+      prop :comment, T.nilable(String), default: nil
+      # Trace ID to link the score to (required for Langfuse)
+      prop :trace_id, T.nilable(String), default: nil
+      # Observation/span ID to link the score to (optional)
+      prop :observation_id, T.nilable(String), default: nil
+      # Timestamp when the score was created
+      prop :timestamp, Time, factory: -> { Time.now }
+      # Serialize to Langfuse API payload format
+      sig { returns(T::Hash[Symbol, T.untyped]) }
+      def to_langfuse_payload
+        payload = {
+          id: id,
+          name: name,
+          value: value,
+          dataType: data_type.serialize
+        }
+        payload[:comment] = comment if comment
+        payload[:traceId] = trace_id if trace_id
+        payload[:observationId] = observation_id if observation_id
+        payload
+      end
+    end
+  end
+end

data/lib/dspy/scores.rb ADDED Viewed

@@ -0,0 +1,135 @@
+# frozen_string_literal: true
+require_relative 'scores/data_type'
+require_relative 'scores/score_event'
+require_relative 'scores/evaluators'
+module DSPy
+  # Score reporting for Langfuse integration
+  # Provides a simple API for creating and exporting evaluation scores
+  module Scores
+    extend T::Sig
+    class << self
+      extend T::Sig
+      # Create a score event from the current context
+      #
+      # @param name [String] Score identifier (e.g., "accuracy", "relevance")
+      # @param value [Numeric, String] Score value
+      # @param data_type [DataType] Type of score (default: Numeric)
+      # @param comment [String, nil] Optional human-readable comment
+      # @param span [Object, nil] Optional span to attach score to
+      # @param emit [Boolean] Whether to emit score.create event (default: true)
+      # @return [ScoreEvent] The created score event
+      sig do
+        params(
+          name: String,
+          value: T.any(Numeric, String),
+          data_type: DataType,
+          comment: T.nilable(String),
+          span: T.untyped,
+          trace_id: T.nilable(String),
+          observation_id: T.nilable(String),
+          emit: T::Boolean
+        ).returns(ScoreEvent)
+      end
+      def create(
+        name:,
+        value:,
+        data_type: DataType::Numeric,
+        comment: nil,
+        span: nil,
+        trace_id: nil,
+        observation_id: nil,
+        emit: true
+      )
+        # Extract trace_id from context if not provided
+        resolved_trace_id = trace_id || extract_trace_id_from_context
+        resolved_observation_id = observation_id || extract_observation_id_from_span(span)
+        event = ScoreEvent.new(
+          name: name,
+          value: value,
+          data_type: data_type,
+          comment: comment,
+          trace_id: resolved_trace_id,
+          observation_id: resolved_observation_id
+        )
+        # Emit score.create event for listeners and exporters
+        emit_score_event(event) if emit
+        event
+      end
+      private
+      sig { returns(T.nilable(String)) }
+      def extract_trace_id_from_context
+        return nil unless defined?(DSPy::Context)
+        DSPy::Context.current[:trace_id]
+      rescue StandardError
+        nil
+      end
+      sig { params(span: T.untyped).returns(T.nilable(String)) }
+      def extract_observation_id_from_span(span)
+        return nil unless span
+        if span.respond_to?(:context) && span.context.respond_to?(:span_id)
+          span.context.span_id
+        elsif span.respond_to?(:span_id)
+          span.span_id
+        end
+      rescue StandardError
+        nil
+      end
+      sig { params(event: ScoreEvent).void }
+      def emit_score_event(event)
+        return unless defined?(DSPy) && DSPy.respond_to?(:events)
+        DSPy.events.notify('score.create', {
+          score_id: event.id,
+          score_name: event.name,
+          score_value: event.value,
+          score_data_type: event.data_type.serialize,
+          score_comment: event.comment,
+          trace_id: event.trace_id,
+          observation_id: event.observation_id,
+          timestamp: event.timestamp.iso8601
+        })
+      rescue StandardError => e
+        DSPy.log('score.emit_error', error: e.message) if DSPy.respond_to?(:log)
+      end
+    end
+  end
+  # Top-level convenience method for creating scores
+  #
+  # @example Basic usage
+  #   DSPy.score('accuracy', 0.95)
+  #
+  # @example With comment
+  #   DSPy.score('accuracy', 0.95, comment: 'Exact match')
+  #
+  # @example Boolean score
+  #   DSPy.score('is_valid', 1, data_type: DSPy::Scores::DataType::Boolean)
+  #
+  # @example Categorical score
+  #   DSPy.score('sentiment', 'positive', data_type: DSPy::Scores::DataType::Categorical)
+  #
+  def self.score(name, value, data_type: Scores::DataType::Numeric, comment: nil, span: nil, trace_id: nil, observation_id: nil)
+    Scores.create(
+      name: name,
+      value: value,
+      data_type: data_type,
+      comment: comment,
+      span: span,
+      trace_id: trace_id,
+      observation_id: observation_id
+    )
+  end
+end

data/lib/dspy/version.rb CHANGED Viewed

@@ -1,5 +1,5 @@
 # frozen_string_literal: true
 module DSPy
-  VERSION = "0.33.0"
+  VERSION = "0.34.0"
 end

data/lib/dspy.rb CHANGED Viewed

@@ -223,6 +223,7 @@ require_relative 'dspy/events/subscriber_mixin'
 require_relative 'dspy/chain_of_thought'
 require_relative 'dspy/re_act'
 require_relative 'dspy/evals'
+require_relative 'dspy/scores'
 require_relative 'dspy/teleprompt/teleprompter'
 require_relative 'dspy/teleprompt/utils'
 require_relative 'dspy/teleprompt/data_handler'

metadata CHANGED Viewed

@@ -1,7 +1,7 @@
 --- !ruby/object:Gem::Specification
 name: dspy
 version: !ruby/object:Gem::Version
-  version: 0.33.0
+  version: 0.34.0
 platform: ruby
 authors:
 - Vicente Reig Rincón de Arellano
@@ -219,6 +219,10 @@ files:
 - lib/dspy/schema/sorbet_toon_adapter.rb
 - lib/dspy/schema/version.rb
 - lib/dspy/schema_adapters.rb
+- lib/dspy/scores.rb
+- lib/dspy/scores/data_type.rb
+- lib/dspy/scores/evaluators.rb
+- lib/dspy/scores/score_event.rb
 - lib/dspy/signature.rb
 - lib/dspy/storage/program_storage.rb
 - lib/dspy/storage/storage_manager.rb