RubyGems - dspy - Versions diffs - 0.11.0 → 0.13.0 - Mend

dspy 0.11.0 → 0.13.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (11) hide show

checksums.yaml +4 -4
data/README.md +3 -0
data/lib/dspy/instrumentation/token_tracker.rb +18 -2
data/lib/dspy/lm/adapters/anthropic_adapter.rb +4 -1
data/lib/dspy/lm/adapters/openai_adapter.rb +4 -1
data/lib/dspy/lm/message_builder.rb +28 -0
data/lib/dspy/lm/response.rb +2 -0
data/lib/dspy/lm/usage.rb +157 -0
data/lib/dspy/lm.rb +101 -36
data/lib/dspy/version.rb +1 -1
metadata +4 -2

checksums.yaml CHANGED Viewed

@@ -1,7 +1,7 @@
 ---
 SHA256:
-  metadata.gz: de6126089636bd0d5fdaf9cd2bba791157186683a7984174bac980be8e32a483
-  data.tar.gz: 1e78c20f7e1a37cf025158be0f4014ee2abe9ba95b84682361ff9ea544fecd2c
+  metadata.gz: d80d5b0166fe5a101e4918ffee13a70dec6ca67b493cf2e68dff1c18b2df36c1
+  data.tar.gz: 1687fe88d41c5d4627592ff5e98f87ca9f40186870386291b1ead091f51235da
 SHA512:
-  metadata.gz: 7b3dbde7e5040dc1b0562142bdc560f9fc393bbb7cf20bef4d767541be079408dc1c6e196257f07193266713585bdfb21ac77bd08df3c6ab69607354393987c2
-  data.tar.gz: 412cf071635f85bb3fb08f339a1dce23d4a9f00144104fc03fc25cc706d87c86c707221bc8a98f6847a7e157e2fa86de8bee4ee7c44d2bd1b9b0ab41934dd411
+  metadata.gz: 7bedebf2e58243bedcf8003d25b4f55789a7e4a611f9f1997a322e93db553fe117fde155415c5a607a65b49f61f2a640c7899a0fd4793bcdf9e8672602f54755
+  data.tar.gz: cdf39e605a7550e94334c5bad13ccc5af2a255b7039559b56b918e1ba3706e671c52960558459a0c96edab318c68bc9828b1eb03530b47daa896766a2cb7aa7d

data/README.md CHANGED Viewed

@@ -1,5 +1,8 @@
 # DSPy.rb
+[![Gem Version](https://img.shields.io/gem/v/dspy)](https://rubygems.org/gems/dspy)
+[![Total Downloads](https://img.shields.io/gem/dt/dspy)](https://rubygems.org/gems/dspy)
 **Build reliable LLM applications in Ruby using composable, type-safe modules.**
 DSPy.rb brings structured LLM programming to Ruby developers. Instead of wrestling with prompt strings and parsing responses, you define typed signatures and compose them into pipelines that just work.

data/lib/dspy/instrumentation/token_tracker.rb CHANGED Viewed

@@ -1,5 +1,7 @@
 # frozen_string_literal: true
+require_relative '../lm/usage'
 module DSPy
   module Instrumentation
     # Utility for extracting token usage from different LM adapters
@@ -9,6 +11,18 @@ module DSPy
       # Extract actual token usage from API responses
       def extract_token_usage(response, provider)
+        return {} unless response&.usage
+        # Handle Usage struct
+        if response.usage.is_a?(DSPy::LM::Usage) || response.usage.is_a?(DSPy::LM::OpenAIUsage)
+          return {
+            input_tokens: response.usage.input_tokens,
+            output_tokens: response.usage.output_tokens,
+            total_tokens: response.usage.total_tokens
+          }
+        end
+        # Fallback to legacy hash handling
         case provider.to_s.downcase
         when 'openai'
           extract_openai_tokens(response)
@@ -27,11 +41,12 @@ module DSPy
         usage = response.usage
         return {} unless usage.is_a?(Hash)
+        # Handle both symbol and string keys for VCR compatibility
         {
           input_tokens: usage[:prompt_tokens] || usage['prompt_tokens'],
           output_tokens: usage[:completion_tokens] || usage['completion_tokens'],
           total_tokens: usage[:total_tokens] || usage['total_tokens']
-        }
+        }.compact # Remove nil values
       end
       def extract_anthropic_tokens(response)
@@ -40,6 +55,7 @@ module DSPy
         usage = response.usage
         return {} unless usage.is_a?(Hash)
+        # Handle both symbol and string keys for VCR compatibility
         input_tokens = usage[:input_tokens] || usage['input_tokens'] || 0
         output_tokens = usage[:output_tokens] || usage['output_tokens'] || 0
@@ -47,7 +63,7 @@ module DSPy
           input_tokens: input_tokens,
           output_tokens: output_tokens,
           total_tokens: input_tokens + output_tokens
-        }
+        }.compact # Remove nil values
       end
     end
   end

data/lib/dspy/lm/adapters/anthropic_adapter.rb CHANGED Viewed

@@ -63,9 +63,12 @@ module DSPy
             content = response.content.first.text if response.content.is_a?(Array) && response.content.first
             usage = response.usage
+            # Convert usage data to typed struct
+            usage_struct = UsageFactory.create('anthropic', usage)
             Response.new(
               content: content,
-              usage: usage.respond_to?(:to_h) ? usage.to_h : usage,
+              usage: usage_struct,
               metadata: {
                 provider: 'anthropic',
                 model: model,

data/lib/dspy/lm/adapters/openai_adapter.rb CHANGED Viewed

@@ -52,9 +52,12 @@ module DSPy
             raise AdapterError, "OpenAI refused to generate output: #{message.refusal}"
           end
+          # Convert usage data to typed struct
+          usage_struct = UsageFactory.create('openai', usage)
           Response.new(
             content: content,
-            usage: usage.respond_to?(:to_h) ? usage.to_h : usage,
+            usage: usage_struct,
             metadata: {
               provider: 'openai',
               model: model,

data/lib/dspy/lm/message_builder.rb ADDED Viewed

@@ -0,0 +1,28 @@
+# frozen_string_literal: true
+module DSPy
+  class LM
+    class MessageBuilder
+      attr_reader :messages
+      def initialize
+        @messages = []
+      end
+      def system(content)
+        @messages << { role: 'system', content: content.to_s }
+        self
+      end
+      def user(content)
+        @messages << { role: 'user', content: content.to_s }
+        self
+      end
+      def assistant(content)
+        @messages << { role: 'assistant', content: content.to_s }
+        self
+      end
+    end
+  end
+end

data/lib/dspy/lm/response.rb CHANGED Viewed

@@ -1,5 +1,7 @@
 # frozen_string_literal: true
+require_relative 'usage'
 module DSPy
   class LM
     # Normalized response format for all LM providers

data/lib/dspy/lm/usage.rb ADDED Viewed

@@ -0,0 +1,157 @@
+# frozen_string_literal: true
+require 'sorbet-runtime'
+module DSPy
+  class LM
+    # Base class for token usage information
+    class Usage < T::Struct
+      extend T::Sig
+      const :input_tokens, Integer
+      const :output_tokens, Integer
+      const :total_tokens, Integer
+      sig { returns(Hash) }
+      def to_h
+        {
+          input_tokens: input_tokens,
+          output_tokens: output_tokens,
+          total_tokens: total_tokens
+        }
+      end
+    end
+    # OpenAI-specific usage information with additional fields
+    class OpenAIUsage < T::Struct
+      extend T::Sig
+      const :input_tokens, Integer
+      const :output_tokens, Integer
+      const :total_tokens, Integer
+      const :prompt_tokens_details, T.nilable(T::Hash[Symbol, Integer]), default: nil
+      const :completion_tokens_details, T.nilable(T::Hash[Symbol, Integer]), default: nil
+      sig { returns(Hash) }
+      def to_h
+        base = {
+          input_tokens: input_tokens,
+          output_tokens: output_tokens,
+          total_tokens: total_tokens
+        }
+        base[:prompt_tokens_details] = prompt_tokens_details if prompt_tokens_details
+        base[:completion_tokens_details] = completion_tokens_details if completion_tokens_details
+        base
+      end
+    end
+    # Factory for creating appropriate usage objects
+    module UsageFactory
+      extend T::Sig
+      sig { params(provider: String, usage_data: T.untyped).returns(T.nilable(T.any(Usage, OpenAIUsage))) }
+      def self.create(provider, usage_data)
+        return nil if usage_data.nil?
+        # If already a Usage struct, return as-is
+        return usage_data if usage_data.is_a?(Usage)
+        # Handle test doubles by converting to hash
+        if usage_data.respond_to?(:to_h)
+          usage_data = usage_data.to_h
+        end
+        # Convert hash to appropriate struct
+        return nil unless usage_data.is_a?(Hash)
+        # Normalize keys to symbols
+        normalized = usage_data.transform_keys(&:to_sym)
+        case provider.to_s.downcase
+        when 'openai'
+          create_openai_usage(normalized)
+        when 'anthropic'
+          create_anthropic_usage(normalized)
+        else
+          create_generic_usage(normalized)
+        end
+      end
+      private
+      sig { params(data: T::Hash[Symbol, T.untyped]).returns(T.nilable(OpenAIUsage)) }
+      def self.create_openai_usage(data)
+        # OpenAI uses prompt_tokens/completion_tokens
+        input_tokens = data[:prompt_tokens] || data[:input_tokens] || 0
+        output_tokens = data[:completion_tokens] || data[:output_tokens] || 0
+        total_tokens = data[:total_tokens] || (input_tokens + output_tokens)
+        # Convert prompt_tokens_details and completion_tokens_details to hashes if needed
+        prompt_details = convert_to_hash(data[:prompt_tokens_details])
+        completion_details = convert_to_hash(data[:completion_tokens_details])
+        OpenAIUsage.new(
+          input_tokens: input_tokens,
+          output_tokens: output_tokens,
+          total_tokens: total_tokens,
+          prompt_tokens_details: prompt_details,
+          completion_tokens_details: completion_details
+        )
+      rescue => e
+        DSPy.logger.debug("Failed to create OpenAI usage: #{e.message}")
+        nil
+      end
+      sig { params(value: T.untyped).returns(T.nilable(T::Hash[Symbol, Integer])) }
+      def self.convert_to_hash(value)
+        return nil if value.nil?
+        return value if value.is_a?(Hash) && value.keys.all? { |k| k.is_a?(Symbol) }
+        # Convert object to hash if it responds to to_h
+        if value.respond_to?(:to_h)
+          hash = value.to_h
+          # Ensure symbol keys and integer values
+          hash.transform_keys(&:to_sym).transform_values(&:to_i)
+        else
+          nil
+        end
+      rescue
+        nil
+      end
+      sig { params(data: T::Hash[Symbol, T.untyped]).returns(T.nilable(Usage)) }
+      def self.create_anthropic_usage(data)
+        # Anthropic uses input_tokens/output_tokens
+        input_tokens = data[:input_tokens] || 0
+        output_tokens = data[:output_tokens] || 0
+        total_tokens = data[:total_tokens] || (input_tokens + output_tokens)
+        Usage.new(
+          input_tokens: input_tokens,
+          output_tokens: output_tokens,
+          total_tokens: total_tokens
+        )
+      rescue => e
+        DSPy.logger.debug("Failed to create Anthropic usage: #{e.message}")
+        nil
+      end
+      sig { params(data: T::Hash[Symbol, T.untyped]).returns(T.nilable(Usage)) }
+      def self.create_generic_usage(data)
+        # Generic fallback
+        input_tokens = data[:input_tokens] || data[:prompt_tokens] || 0
+        output_tokens = data[:output_tokens] || data[:completion_tokens] || 0
+        total_tokens = data[:total_tokens] || (input_tokens + output_tokens)
+        Usage.new(
+          input_tokens: input_tokens,
+          output_tokens: output_tokens,
+          total_tokens: total_tokens
+        )
+      rescue => e
+        DSPy.logger.debug("Failed to create generic usage: #{e.message}")
+        nil
+      end
+    end
+  end
+end

data/lib/dspy/lm.rb CHANGED Viewed

@@ -18,6 +18,9 @@ require_relative 'lm/adapters/anthropic_adapter'
 require_relative 'lm/strategy_selector'
 require_relative 'lm/retry_handler'
+# Load message builder
+require_relative 'lm/message_builder'
 module DSPy
   class LM
     attr_reader :model_id, :api_key, :model, :provider, :adapter
@@ -39,41 +42,13 @@ module DSPy
       # Build messages from inference module
       messages = build_messages(inference_module, input_values)
-      # Calculate input size for monitoring
-      input_text = messages.map { |m| m[:content] }.join(' ')
-      input_size = input_text.length
-      # Use smart consolidation: emit LM events only when not in nested context
-      response = nil
-      token_usage = {}
+      # Execute with instrumentation
+      response = instrument_lm_request(messages, signature_class.name) do
+        chat_with_strategy(messages, signature_class, &block)
+      end
+      # Instrument response parsing
       if should_emit_lm_events?
-        # Emit all LM events when not in nested context
-        response = Instrumentation.instrument('dspy.lm.request', {
-          gen_ai_operation_name: 'chat',
-          gen_ai_system: provider,
-          gen_ai_request_model: model,
-          signature_class: signature_class.name,
-          provider: provider,
-          adapter_class: adapter.class.name,
-          input_size: input_size
-        }) do
-          chat_with_strategy(messages, signature_class, &block)
-        end
-        # Extract actual token usage from response (more accurate than estimation)
-        token_usage = Instrumentation::TokenTracker.extract_token_usage(response, provider)
-        # Emit token usage event if available
-        if token_usage.any?
-          Instrumentation.emit('dspy.lm.tokens', token_usage.merge({
-            gen_ai_system: provider,
-            gen_ai_request_model: model,
-            signature_class: signature_class.name
-          }))
-        end
-        # Instrument response parsing
         parsed_result = Instrumentation.instrument('dspy.lm.response.parsed', {
           signature_class: signature_class.name,
           provider: provider,
@@ -82,15 +57,33 @@ module DSPy
           parse_response(response, input_values, signature_class)
         end
       else
-        # Consolidated mode: execute without nested instrumentation
-        response = chat_with_strategy(messages, signature_class, &block)
-        token_usage = Instrumentation::TokenTracker.extract_token_usage(response, provider)
         parsed_result = parse_response(response, input_values, signature_class)
       end
       parsed_result
     end
+    def raw_chat(messages = nil, &block)
+      # Support both array format and builder DSL
+      if block_given? && messages.nil?
+        # DSL mode - block is for building messages
+        builder = MessageBuilder.new
+        yield builder
+        messages = builder.messages
+        streaming_block = nil
+      else
+        # Array mode - block is for streaming
+        messages ||= []
+        streaming_block = block
+      end
+      # Validate messages format
+      validate_messages!(messages)
+      # Execute with instrumentation
+      execute_raw_chat(messages, &streaming_block)
+    end
     private
     def chat_with_strategy(messages, signature_class, &block)
@@ -208,5 +201,77 @@ module DSPy
         raise "Failed to parse LLM response as JSON: #{e.message}. Original content length: #{response.content&.length || 0} chars"
       end
     end
+    # Common instrumentation method for LM requests
+    def instrument_lm_request(messages, signature_class_name, &execution_block)
+      input_text = messages.map { |m| m[:content] }.join(' ')
+      input_size = input_text.length
+      response = nil
+      if should_emit_lm_events?
+        # Emit dspy.lm.request event
+        response = Instrumentation.instrument('dspy.lm.request', {
+          gen_ai_operation_name: 'chat',
+          gen_ai_system: provider,
+          gen_ai_request_model: model,
+          signature_class: signature_class_name,
+          provider: provider,
+          adapter_class: adapter.class.name,
+          input_size: input_size
+        }, &execution_block)
+        # Extract and emit token usage
+        emit_token_usage(response, signature_class_name)
+      else
+        # Consolidated mode: execute without instrumentation
+        response = execution_block.call
+      end
+      response
+    end
+    # Common method to emit token usage events
+    def emit_token_usage(response, signature_class_name)
+      token_usage = Instrumentation::TokenTracker.extract_token_usage(response, provider)
+      if token_usage.any?
+        Instrumentation.emit('dspy.lm.tokens', token_usage.merge({
+          gen_ai_system: provider,
+          gen_ai_request_model: model,
+          signature_class: signature_class_name
+        }))
+      end
+      token_usage
+    end
+    def validate_messages!(messages)
+      unless messages.is_a?(Array)
+        raise ArgumentError, "messages must be an array"
+      end
+      valid_roles = %w[system user assistant]
+      messages.each do |message|
+        unless message.is_a?(Hash) && message.key?(:role) && message.key?(:content)
+          raise ArgumentError, "Each message must have :role and :content"
+        end
+        unless valid_roles.include?(message[:role])
+          raise ArgumentError, "Invalid role: #{message[:role]}. Must be one of: #{valid_roles.join(', ')}"
+        end
+      end
+    end
+    def execute_raw_chat(messages, &streaming_block)
+      response = instrument_lm_request(messages, 'RawPrompt') do
+        # Direct adapter call, no strategies or JSON parsing
+        adapter.chat(messages: messages, signature: nil, &streaming_block)
+      end
+      # Return raw response content, not parsed JSON
+      response.content
+    end
   end
 end

data/lib/dspy/version.rb CHANGED Viewed

@@ -1,5 +1,5 @@
 # frozen_string_literal: true
 module DSPy
-  VERSION = "0.11.0"
+  VERSION = "0.13.0"
 end

metadata CHANGED Viewed

@@ -1,14 +1,14 @@
 --- !ruby/object:Gem::Specification
 name: dspy
 version: !ruby/object:Gem::Version
-  version: 0.11.0
+  version: 0.13.0
 platform: ruby
 authors:
 - Vicente Reig Rincón de Arellano
 autorequire:
 bindir: bin
 cert_chain: []
-date: 2025-07-21 00:00:00.000000000 Z
+date: 2025-07-25 00:00:00.000000000 Z
 dependencies:
 - !ruby/object:Gem::Dependency
   name: dry-configurable
@@ -177,6 +177,7 @@ files:
 - lib/dspy/lm/adapters/openai_adapter.rb
 - lib/dspy/lm/cache_manager.rb
 - lib/dspy/lm/errors.rb
+- lib/dspy/lm/message_builder.rb
 - lib/dspy/lm/response.rb
 - lib/dspy/lm/retry_handler.rb
 - lib/dspy/lm/strategies/anthropic_extraction_strategy.rb
@@ -185,6 +186,7 @@ files:
 - lib/dspy/lm/strategies/openai_structured_output_strategy.rb
 - lib/dspy/lm/strategy_selector.rb
 - lib/dspy/lm/structured_output_strategy.rb
+- lib/dspy/lm/usage.rb
 - lib/dspy/memory.rb
 - lib/dspy/memory/embedding_engine.rb
 - lib/dspy/memory/in_memory_store.rb