dspy 0.2.0 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,22 +1,162 @@
1
+ # typed: strict
1
2
  # frozen_string_literal: true
2
3
 
4
+ require 'sorbet-runtime'
5
+ require_relative 'predict'
6
+ require_relative 'signature'
7
+ require_relative 'instrumentation'
8
+
3
9
  module DSPy
4
10
  # Enhances prediction by encouraging step-by-step reasoning
5
- # before providing a final answer.
11
+ # before providing a final answer using Sorbet signatures.
6
12
  class ChainOfThought < Predict
13
+ extend T::Sig
14
+
15
+ FieldDescriptor = DSPy::Signature::FieldDescriptor
7
16
 
17
+ sig { params(signature_class: T.class_of(DSPy::Signature)).void }
8
18
  def initialize(signature_class)
9
- @signature_class = signature_class
10
- chain_of_thought_schema = Dry::Schema.JSON do
11
- required(:reasoning).
12
- value(:string).
13
- meta(description: "Reasoning: Let's think step by step in order to #{signature_class.description}")
19
+ @original_signature = signature_class
20
+
21
+ # Create enhanced output struct with reasoning
22
+ enhanced_output_struct = create_enhanced_output_struct(signature_class)
23
+
24
+ # Create enhanced signature class
25
+ enhanced_signature = Class.new(DSPy::Signature) do
26
+ # Set the description
27
+ description "#{signature_class.description} Think step by step."
28
+
29
+ # Use the same input struct and copy field descriptors
30
+ @input_struct_class = signature_class.input_struct_class
31
+ @input_field_descriptors = signature_class.instance_variable_get(:@input_field_descriptors) || {}
32
+
33
+ # Use the enhanced output struct and create field descriptors for it
34
+ @output_struct_class = enhanced_output_struct
35
+
36
+ # Create field descriptors for the enhanced output struct
37
+ @output_field_descriptors = {}
38
+
39
+ # Copy original output field descriptors
40
+ original_output_descriptors = signature_class.instance_variable_get(:@output_field_descriptors) || {}
41
+ @output_field_descriptors.merge!(original_output_descriptors)
42
+
43
+ # Add reasoning field descriptor (ChainOfThought always provides this)
44
+ @output_field_descriptors[:reasoning] = FieldDescriptor.new(String, "Step by step reasoning process")
45
+
46
+ class << self
47
+ attr_reader :input_struct_class, :output_struct_class
48
+ end
49
+ end
50
+
51
+ # Call parent constructor with enhanced signature
52
+ super(enhanced_signature)
53
+ @signature_class = enhanced_signature
54
+ end
55
+
56
+ # Override forward_untyped to add ChainOfThought-specific instrumentation
57
+ sig { override.params(input_values: T.untyped).returns(T.untyped) }
58
+ def forward_untyped(**input_values)
59
+ # Prepare instrumentation payload
60
+ input_fields = input_values.keys.map(&:to_s)
61
+
62
+ # Instrument ChainOfThought lifecycle
63
+ result = Instrumentation.instrument('dspy.chain_of_thought', {
64
+ signature_class: @original_signature.name,
65
+ model: lm.model,
66
+ provider: lm.provider,
67
+ input_fields: input_fields
68
+ }) do
69
+ # Call parent prediction logic
70
+ prediction_result = super(**input_values)
71
+
72
+ # Analyze reasoning if present
73
+ if prediction_result.respond_to?(:reasoning) && prediction_result.reasoning
74
+ reasoning_content = prediction_result.reasoning.to_s
75
+ reasoning_length = reasoning_content.length
76
+ reasoning_steps = count_reasoning_steps(reasoning_content)
77
+
78
+ # Emit reasoning analysis event
79
+ Instrumentation.emit('dspy.chain_of_thought.reasoning_complete', {
80
+ signature_class: @original_signature.name,
81
+ reasoning_steps: reasoning_steps,
82
+ reasoning_length: reasoning_length,
83
+ has_reasoning: !reasoning_content.empty?
84
+ })
85
+ end
86
+
87
+ prediction_result
88
+ end
89
+
90
+ result
91
+ end
92
+
93
+ private
94
+
95
+ # Count reasoning steps by looking for step indicators
96
+ def count_reasoning_steps(reasoning_text)
97
+ return 0 if reasoning_text.nil? || reasoning_text.empty?
98
+
99
+ # Look for common step patterns
100
+ step_patterns = [
101
+ /step \d+/i,
102
+ /\d+\./,
103
+ /first|second|third|then|next|finally/i,
104
+ /\n\s*-/
105
+ ]
106
+
107
+ max_count = 0
108
+ step_patterns.each do |pattern|
109
+ count = reasoning_text.scan(pattern).length
110
+ max_count = [max_count, count].max
111
+ end
112
+
113
+ # Fallback: count sentences if no clear steps
114
+ max_count > 0 ? max_count : reasoning_text.split(/[.!?]+/).reject(&:empty?).length
115
+ end
116
+
117
+ sig { params(signature_class: T.class_of(DSPy::Signature)).returns(T.class_of(T::Struct)) }
118
+ def create_enhanced_output_struct(signature_class)
119
+ # Get original output props
120
+ original_props = signature_class.output_struct_class.props
121
+
122
+ # Create new struct class with reasoning added
123
+ Class.new(T::Struct) do
124
+ # Add all original fields
125
+ original_props.each do |name, prop|
126
+ # Extract the type and other options
127
+ type = prop[:type]
128
+ options = prop.except(:type, :type_object, :accessor_key, :sensitivity, :redaction)
129
+
130
+ # Handle default values
131
+ if options[:default]
132
+ const name, type, default: options[:default]
133
+ elsif options[:factory]
134
+ const name, type, factory: options[:factory]
135
+ else
136
+ const name, type
137
+ end
138
+ end
139
+
140
+ # Add reasoning field (ChainOfThought always provides this)
141
+ const :reasoning, String
142
+
143
+ # Add to_h method to serialize the struct to a hash
144
+ define_method :to_h do
145
+ hash = {}
146
+
147
+ # Start with input values if available
148
+ if self.instance_variable_defined?(:@input_values)
149
+ hash.merge!(self.instance_variable_get(:@input_values))
150
+ end
151
+
152
+ # Then add output properties
153
+ self.class.props.keys.each do |key|
154
+ hash[key] = self.send(key)
155
+ end
156
+
157
+ hash
158
+ end
14
159
  end
15
- @signature_class.output_schema = Dry::Schema.JSON(parent:
16
- [
17
- @signature_class.output_schema,
18
- chain_of_thought_schema
19
- ])
20
160
  end
21
161
  end
22
162
  end
@@ -0,0 +1,54 @@
1
+ # frozen_string_literal: true
2
+
3
+ module DSPy
4
+ module Instrumentation
5
+ # Utility for extracting token usage from different LM adapters
6
+ # Uses actual token counts from API responses for accuracy
7
+ module TokenTracker
8
+ extend self
9
+
10
+ # Extract actual token usage from API responses
11
+ def extract_token_usage(response, provider)
12
+ case provider.to_s.downcase
13
+ when 'openai'
14
+ extract_openai_tokens(response)
15
+ when 'anthropic'
16
+ extract_anthropic_tokens(response)
17
+ else
18
+ {} # No token information for other providers
19
+ end
20
+ end
21
+
22
+ private
23
+
24
+ def extract_openai_tokens(response)
25
+ return {} unless response&.usage
26
+
27
+ usage = response.usage
28
+ return {} unless usage.is_a?(Hash)
29
+
30
+ {
31
+ tokens_input: usage[:prompt_tokens] || usage['prompt_tokens'],
32
+ tokens_output: usage[:completion_tokens] || usage['completion_tokens'],
33
+ tokens_total: usage[:total_tokens] || usage['total_tokens']
34
+ }
35
+ end
36
+
37
+ def extract_anthropic_tokens(response)
38
+ return {} unless response&.usage
39
+
40
+ usage = response.usage
41
+ return {} unless usage.is_a?(Hash)
42
+
43
+ input_tokens = usage[:input_tokens] || usage['input_tokens'] || 0
44
+ output_tokens = usage[:output_tokens] || usage['output_tokens'] || 0
45
+
46
+ {
47
+ tokens_input: input_tokens,
48
+ tokens_output: output_tokens,
49
+ tokens_total: input_tokens + output_tokens
50
+ }
51
+ end
52
+ end
53
+ end
54
+ end
@@ -0,0 +1,100 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'dry-monitor'
4
+ require 'dry-configurable'
5
+
6
+ module DSPy
7
+ # Core instrumentation module using dry-monitor for event emission
8
+ # Provides extension points for logging, Langfuse, New Relic, and custom monitoring
9
+ module Instrumentation
10
+
11
+ def self.notifications
12
+ @notifications ||= Dry::Monitor::Notifications.new(:dspy).tap do |n|
13
+ # Register all DSPy events
14
+ n.register_event('dspy.lm.request')
15
+ n.register_event('dspy.lm.tokens')
16
+ n.register_event('dspy.lm.response.parsed')
17
+ n.register_event('dspy.predict')
18
+ n.register_event('dspy.predict.validation_error')
19
+ n.register_event('dspy.chain_of_thought')
20
+ n.register_event('dspy.chain_of_thought.reasoning_step')
21
+ n.register_event('dspy.react')
22
+ n.register_event('dspy.react.tool_call')
23
+ n.register_event('dspy.react.iteration_complete')
24
+ n.register_event('dspy.react.max_iterations')
25
+ end
26
+ end
27
+
28
+ # High-precision timing for performance tracking
29
+ def self.instrument(event_name, payload = {}, &block)
30
+ # If no block is given, return early
31
+ return unless block_given?
32
+
33
+ start_time = Process.clock_gettime(Process::CLOCK_MONOTONIC)
34
+ start_cpu = Process.clock_gettime(Process::CLOCK_PROCESS_CPUTIME_ID)
35
+
36
+ begin
37
+ result = yield
38
+
39
+ end_time = Process.clock_gettime(Process::CLOCK_MONOTONIC)
40
+ end_cpu = Process.clock_gettime(Process::CLOCK_PROCESS_CPUTIME_ID)
41
+
42
+ enhanced_payload = payload.merge(
43
+ duration_ms: ((end_time - start_time) * 1000).round(2),
44
+ cpu_time_ms: ((end_cpu - start_cpu) * 1000).round(2),
45
+ status: 'success',
46
+ timestamp: Time.now.iso8601
47
+ )
48
+
49
+ self.emit_event(event_name, enhanced_payload)
50
+ result
51
+ rescue => error
52
+ end_time = Process.clock_gettime(Process::CLOCK_MONOTONIC)
53
+ end_cpu = Process.clock_gettime(Process::CLOCK_PROCESS_CPUTIME_ID)
54
+
55
+ error_payload = payload.merge(
56
+ duration_ms: ((end_time - start_time) * 1000).round(2),
57
+ cpu_time_ms: ((end_cpu - start_cpu) * 1000).round(2),
58
+ status: 'error',
59
+ error_type: error.class.name,
60
+ error_message: error.message,
61
+ timestamp: Time.now.iso8601
62
+ )
63
+
64
+ self.emit_event(event_name, error_payload)
65
+ raise
66
+ end
67
+ end
68
+
69
+ # Emit event without timing (for discrete events)
70
+ def self.emit(event_name, payload = {})
71
+ enhanced_payload = payload.merge(
72
+ timestamp: Time.now.iso8601,
73
+ status: payload[:status] || 'success'
74
+ )
75
+
76
+ self.emit_event(event_name, enhanced_payload)
77
+ end
78
+
79
+ # Register additional events dynamically (useful for testing)
80
+ def self.register_event(event_name)
81
+ notifications.register_event(event_name)
82
+ end
83
+
84
+ # Subscribe to DSPy instrumentation events
85
+ def self.subscribe(event_pattern = nil, &block)
86
+ if event_pattern
87
+ notifications.subscribe(event_pattern, &block)
88
+ else
89
+ # Subscribe to all DSPy events
90
+ %w[dspy.lm.request dspy.lm.tokens dspy.lm.response.parsed dspy.predict dspy.predict.validation_error dspy.chain_of_thought dspy.chain_of_thought.reasoning_step dspy.react dspy.react.tool_call dspy.react.iteration_complete dspy.react.max_iterations].each do |event_name|
91
+ notifications.subscribe(event_name, &block)
92
+ end
93
+ end
94
+ end
95
+
96
+ def self.emit_event(event_name, payload)
97
+ notifications.instrument(event_name, payload)
98
+ end
99
+ end
100
+ end
@@ -0,0 +1,41 @@
1
+ # frozen_string_literal: true
2
+
3
+ module DSPy
4
+ class LM
5
+ # Base adapter interface for all LM providers
6
+ class Adapter
7
+ attr_reader :model, :api_key
8
+
9
+ def initialize(model:, api_key:)
10
+ @model = model
11
+ @api_key = api_key
12
+ validate_configuration!
13
+ end
14
+
15
+ # Chat interface that all adapters must implement
16
+ # @param messages [Array<Hash>] Array of message hashes with :role and :content
17
+ # @param block [Proc] Optional streaming block
18
+ # @return [DSPy::LM::Response] Normalized response
19
+ def chat(messages:, &block)
20
+ raise NotImplementedError, "Subclasses must implement #chat method"
21
+ end
22
+
23
+ private
24
+
25
+ def validate_configuration!
26
+ raise ConfigurationError, "Model is required" if model.nil? || model.empty?
27
+ raise ConfigurationError, "API key is required" if api_key.nil? || api_key.empty?
28
+ end
29
+
30
+ # Helper method to normalize message format
31
+ def normalize_messages(messages)
32
+ messages.map do |msg|
33
+ {
34
+ role: msg[:role].to_s,
35
+ content: msg[:content].to_s
36
+ }
37
+ end
38
+ end
39
+ end
40
+ end
41
+ end
@@ -0,0 +1,59 @@
1
+ # frozen_string_literal: true
2
+
3
+ module DSPy
4
+ class LM
5
+ # Factory for creating appropriate adapters based on model_id
6
+ class AdapterFactory
7
+ # Maps provider prefixes to adapter classes
8
+ ADAPTER_MAP = {
9
+ 'openai' => 'OpenAIAdapter',
10
+ 'anthropic' => 'AnthropicAdapter',
11
+ 'ruby_llm' => 'RubyLLMAdapter'
12
+ }.freeze
13
+
14
+ class << self
15
+ # Creates an adapter instance based on model_id
16
+ # @param model_id [String] Full model identifier (e.g., "openai/gpt-4")
17
+ # @param api_key [String] API key for the provider
18
+ # @return [DSPy::LM::Adapter] Appropriate adapter instance
19
+ def create(model_id, api_key:)
20
+ provider, model = parse_model_id(model_id)
21
+ adapter_class = get_adapter_class(provider)
22
+
23
+ adapter_class.new(model: model, api_key: api_key)
24
+ end
25
+
26
+ private
27
+
28
+ # Parse model_id to determine provider and model
29
+ def parse_model_id(model_id)
30
+ if model_id.include?('/')
31
+ provider, model = model_id.split('/', 2)
32
+ [provider, model]
33
+ else
34
+ # Legacy format: assume ruby_llm for backward compatibility
35
+ ['ruby_llm', model_id]
36
+ end
37
+ end
38
+
39
+ def get_adapter_class(provider)
40
+ adapter_class_name = ADAPTER_MAP[provider]
41
+
42
+ unless adapter_class_name
43
+ available_providers = ADAPTER_MAP.keys.join(', ')
44
+ raise UnsupportedProviderError,
45
+ "Unsupported provider: #{provider}. Available: #{available_providers}"
46
+ end
47
+
48
+ begin
49
+ Object.const_get("DSPy::LM::#{adapter_class_name}")
50
+ rescue NameError
51
+ raise UnsupportedProviderError,
52
+ "Adapter not found: DSPy::LM::#{adapter_class_name}. " \
53
+ "Make sure the corresponding gem is installed."
54
+ end
55
+ end
56
+ end
57
+ end
58
+ end
59
+ end
@@ -0,0 +1,96 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'anthropic'
4
+
5
+ module DSPy
6
+ class LM
7
+ class AnthropicAdapter < Adapter
8
+ def initialize(model:, api_key:)
9
+ super
10
+ @client = Anthropic::Client.new(api_key: api_key)
11
+ end
12
+
13
+ def chat(messages:, &block)
14
+ # Anthropic requires system message to be separate from messages
15
+ system_message, user_messages = extract_system_message(normalize_messages(messages))
16
+
17
+ request_params = {
18
+ model: model,
19
+ messages: user_messages,
20
+ max_tokens: 4096, # Required for Anthropic
21
+ temperature: 0.0 # DSPy default for deterministic responses
22
+ }
23
+
24
+ # Add system message if present
25
+ request_params[:system] = system_message if system_message
26
+
27
+ # Add streaming if block provided
28
+ if block_given?
29
+ request_params[:stream] = true
30
+ end
31
+
32
+ begin
33
+ if block_given?
34
+ content = ""
35
+ @client.messages.stream(**request_params) do |chunk|
36
+ if chunk.respond_to?(:delta) && chunk.delta.respond_to?(:text)
37
+ chunk_text = chunk.delta.text
38
+ content += chunk_text
39
+ block.call(chunk)
40
+ end
41
+ end
42
+
43
+ Response.new(
44
+ content: content,
45
+ usage: nil, # Usage not available in streaming
46
+ metadata: {
47
+ provider: 'anthropic',
48
+ model: model,
49
+ streaming: true
50
+ }
51
+ )
52
+ else
53
+ response = @client.messages.create(**request_params)
54
+
55
+ if response.respond_to?(:error) && response.error
56
+ raise AdapterError, "Anthropic API error: #{response.error}"
57
+ end
58
+
59
+ content = response.content.first.text if response.content.is_a?(Array) && response.content.first
60
+ usage = response.usage
61
+
62
+ Response.new(
63
+ content: content,
64
+ usage: usage.respond_to?(:to_h) ? usage.to_h : usage,
65
+ metadata: {
66
+ provider: 'anthropic',
67
+ model: model,
68
+ response_id: response.id,
69
+ role: response.role
70
+ }
71
+ )
72
+ end
73
+ rescue => e
74
+ raise AdapterError, "Anthropic adapter error: #{e.message}"
75
+ end
76
+ end
77
+
78
+ private
79
+
80
+ def extract_system_message(messages)
81
+ system_message = nil
82
+ user_messages = []
83
+
84
+ messages.each do |msg|
85
+ if msg[:role] == 'system'
86
+ system_message = msg[:content]
87
+ else
88
+ user_messages << msg
89
+ end
90
+ end
91
+
92
+ [system_message, user_messages]
93
+ end
94
+ end
95
+ end
96
+ end
@@ -0,0 +1,53 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'openai'
4
+
5
+ module DSPy
6
+ class LM
7
+ class OpenAIAdapter < Adapter
8
+ def initialize(model:, api_key:)
9
+ super
10
+ @client = OpenAI::Client.new(api_key: api_key)
11
+ end
12
+
13
+ def chat(messages:, &block)
14
+ request_params = {
15
+ model: model,
16
+ messages: normalize_messages(messages),
17
+ temperature: 0.0 # DSPy default for deterministic responses
18
+ }
19
+
20
+ # Add streaming if block provided
21
+ if block_given?
22
+ request_params[:stream] = proc do |chunk, _bytesize|
23
+ block.call(chunk) if chunk.dig("choices", 0, "delta", "content")
24
+ end
25
+ end
26
+
27
+ begin
28
+ response = @client.chat.completions.create(**request_params)
29
+
30
+ if response.respond_to?(:error) && response.error
31
+ raise AdapterError, "OpenAI API error: #{response.error}"
32
+ end
33
+
34
+ content = response.choices.first.message.content
35
+ usage = response.usage
36
+
37
+ Response.new(
38
+ content: content,
39
+ usage: usage.respond_to?(:to_h) ? usage.to_h : usage,
40
+ metadata: {
41
+ provider: 'openai',
42
+ model: model,
43
+ response_id: response.id,
44
+ created: response.created
45
+ }
46
+ )
47
+ rescue => e
48
+ raise AdapterError, "OpenAI adapter error: #{e.message}"
49
+ end
50
+ end
51
+ end
52
+ end
53
+ end
@@ -0,0 +1,81 @@
1
+ # frozen_string_literal: true
2
+
3
+ begin
4
+ require 'ruby_llm'
5
+ rescue LoadError
6
+ # ruby_llm is optional for backward compatibility
7
+ end
8
+
9
+ module DSPy
10
+ class LM
11
+ class RubyLLMAdapter < Adapter
12
+ def initialize(model:, api_key:)
13
+ super
14
+
15
+ unless defined?(RubyLLM)
16
+ raise ConfigurationError,
17
+ "ruby_llm gem is required for RubyLLMAdapter. " \
18
+ "Add 'gem \"ruby_llm\"' to your Gemfile."
19
+ end
20
+
21
+ configure_ruby_llm
22
+ end
23
+
24
+ def chat(messages:, &block)
25
+ begin
26
+ chat = RubyLLM.chat(model: model)
27
+
28
+ # Add messages to chat
29
+ messages.each do |msg|
30
+ chat.add_message(role: msg[:role].to_sym, content: msg[:content])
31
+ end
32
+
33
+ # Get the last user message for ask method
34
+ last_user_message = messages.reverse.find { |msg| msg[:role] == 'user' }
35
+
36
+ if last_user_message
37
+ # Remove the last user message since ask() will add it
38
+ chat.messages.pop if chat.messages.last&.content == last_user_message[:content]
39
+ chat.ask(last_user_message[:content], &block)
40
+ else
41
+ raise AdapterError, "No user message found in conversation"
42
+ end
43
+
44
+ content = chat.messages.last&.content || ""
45
+
46
+ Response.new(
47
+ content: content,
48
+ usage: nil, # ruby_llm doesn't provide usage info
49
+ metadata: {
50
+ provider: 'ruby_llm',
51
+ model: model,
52
+ message_count: chat.messages.length
53
+ }
54
+ )
55
+ rescue => e
56
+ raise AdapterError, "RubyLLM adapter error: #{e.message}"
57
+ end
58
+ end
59
+
60
+ private
61
+
62
+ def configure_ruby_llm
63
+ # Determine provider from model for configuration
64
+ if model.include?('gpt') || model.include?('openai')
65
+ RubyLLM.configure do |config|
66
+ config.openai_api_key = api_key
67
+ end
68
+ elsif model.include?('claude') || model.include?('anthropic')
69
+ RubyLLM.configure do |config|
70
+ config.anthropic_api_key = api_key
71
+ end
72
+ else
73
+ # Default to OpenAI configuration
74
+ RubyLLM.configure do |config|
75
+ config.openai_api_key = api_key
76
+ end
77
+ end
78
+ end
79
+ end
80
+ end
81
+ end
@@ -0,0 +1,10 @@
1
+ # frozen_string_literal: true
2
+
3
+ module DSPy
4
+ class LM
5
+ class Error < StandardError; end
6
+ class AdapterError < Error; end
7
+ class UnsupportedProviderError < Error; end
8
+ class ConfigurationError < Error; end
9
+ end
10
+ end
@@ -0,0 +1,28 @@
1
+ # frozen_string_literal: true
2
+
3
+ module DSPy
4
+ class LM
5
+ # Normalized response format for all LM providers
6
+ class Response
7
+ attr_reader :content, :usage, :metadata
8
+
9
+ def initialize(content:, usage: nil, metadata: {})
10
+ @content = content
11
+ @usage = usage
12
+ @metadata = metadata
13
+ end
14
+
15
+ def to_s
16
+ content
17
+ end
18
+
19
+ def to_h
20
+ {
21
+ content: content,
22
+ usage: usage,
23
+ metadata: metadata
24
+ }
25
+ end
26
+ end
27
+ end
28
+ end