dspy 0.3.1 → 0.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,203 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'sorbet-runtime'
4
+ require_relative 'signature'
5
+
6
+ module DSPy
7
+ # Represents a typed training/evaluation example with Signature validation
8
+ # Provides early validation and type safety for evaluation workflows
9
+ class Example
10
+ extend T::Sig
11
+
12
+ sig { returns(T.class_of(Signature)) }
13
+ attr_reader :signature_class
14
+
15
+ sig { returns(T::Struct) }
16
+ attr_reader :input
17
+
18
+ sig { returns(T::Struct) }
19
+ attr_reader :expected
20
+
21
+ sig { returns(T.nilable(String)) }
22
+ attr_reader :id
23
+
24
+ sig { returns(T.nilable(T::Hash[Symbol, T.untyped])) }
25
+ attr_reader :metadata
26
+
27
+ sig do
28
+ params(
29
+ signature_class: T.class_of(Signature),
30
+ input: T::Hash[Symbol, T.untyped],
31
+ expected: T::Hash[Symbol, T.untyped],
32
+ id: T.nilable(String),
33
+ metadata: T.nilable(T::Hash[Symbol, T.untyped])
34
+ ).void
35
+ end
36
+ def initialize(signature_class:, input:, expected:, id: nil, metadata: nil)
37
+ @signature_class = signature_class
38
+ @id = id
39
+ @metadata = metadata&.freeze
40
+
41
+ # Validate and create input struct
42
+ begin
43
+ @input = signature_class.input_struct_class.new(**input)
44
+ rescue ArgumentError => e
45
+ raise ArgumentError, "Invalid input for #{signature_class.name}: #{e.message}"
46
+ rescue TypeError => e
47
+ raise TypeError, "Type error in input for #{signature_class.name}: #{e.message}"
48
+ end
49
+
50
+ # Validate and create expected output struct
51
+ begin
52
+ @expected = signature_class.output_struct_class.new(**expected)
53
+ rescue ArgumentError => e
54
+ raise ArgumentError, "Invalid expected output for #{signature_class.name}: #{e.message}"
55
+ rescue TypeError => e
56
+ raise TypeError, "Type error in expected output for #{signature_class.name}: #{e.message}"
57
+ end
58
+ end
59
+
60
+ # Convert input struct to hash for program execution
61
+ sig { returns(T::Hash[Symbol, T.untyped]) }
62
+ def input_values
63
+ input_hash = {}
64
+ @input.class.props.keys.each do |key|
65
+ input_hash[key] = @input.send(key)
66
+ end
67
+ input_hash
68
+ end
69
+
70
+ # Convert expected struct to hash for comparison
71
+ sig { returns(T::Hash[Symbol, T.untyped]) }
72
+ def expected_values
73
+ expected_hash = {}
74
+ @expected.class.props.keys.each do |key|
75
+ expected_hash[key] = @expected.send(key)
76
+ end
77
+ expected_hash
78
+ end
79
+
80
+ # Check if prediction matches expected output using struct comparison
81
+ sig { params(prediction: T.untyped).returns(T::Boolean) }
82
+ def matches_prediction?(prediction)
83
+ return false unless prediction
84
+
85
+ # Compare each expected field with prediction
86
+ @expected.class.props.keys.all? do |key|
87
+ expected_value = @expected.send(key)
88
+
89
+ # Extract prediction value
90
+ prediction_value = case prediction
91
+ when T::Struct
92
+ prediction.respond_to?(key) ? prediction.send(key) : nil
93
+ when Hash
94
+ prediction[key] || prediction[key.to_s]
95
+ else
96
+ prediction.respond_to?(key) ? prediction.send(key) : nil
97
+ end
98
+
99
+ expected_value == prediction_value
100
+ end
101
+ end
102
+
103
+ # Serialization for persistence and debugging
104
+ sig { returns(T::Hash[Symbol, T.untyped]) }
105
+ def to_h
106
+ result = {
107
+ signature_class: @signature_class.name,
108
+ input: input_values,
109
+ expected: expected_values
110
+ }
111
+
112
+ result[:id] = @id if @id
113
+ result[:metadata] = @metadata if @metadata
114
+ result
115
+ end
116
+
117
+ # Create Example from hash representation
118
+ sig do
119
+ params(
120
+ hash: T::Hash[Symbol, T.untyped],
121
+ signature_registry: T.nilable(T::Hash[String, T.class_of(Signature)])
122
+ ).returns(Example)
123
+ end
124
+ def self.from_h(hash, signature_registry: nil)
125
+ signature_class_name = hash[:signature_class]
126
+
127
+ # Resolve signature class
128
+ signature_class = if signature_registry && signature_registry[signature_class_name]
129
+ signature_registry[signature_class_name]
130
+ else
131
+ # Try to resolve from constant
132
+ Object.const_get(signature_class_name)
133
+ end
134
+
135
+ new(
136
+ signature_class: signature_class,
137
+ input: hash[:input] || {},
138
+ expected: hash[:expected] || {},
139
+ id: hash[:id],
140
+ metadata: hash[:metadata]
141
+ )
142
+ end
143
+
144
+
145
+ # Batch validation for multiple examples
146
+ sig do
147
+ params(
148
+ signature_class: T.class_of(Signature),
149
+ examples_data: T::Array[T::Hash[Symbol, T.untyped]]
150
+ ).returns(T::Array[Example])
151
+ end
152
+ def self.validate_batch(signature_class, examples_data)
153
+ errors = []
154
+ examples = []
155
+
156
+ examples_data.each_with_index do |example_data, index|
157
+ begin
158
+ # Only support structured format with :input and :expected keys
159
+ unless example_data.key?(:input) && example_data.key?(:expected)
160
+ raise ArgumentError, "Example must have :input and :expected keys. Legacy flat format is no longer supported."
161
+ end
162
+
163
+ example = new(
164
+ signature_class: signature_class,
165
+ input: example_data[:input],
166
+ expected: example_data[:expected],
167
+ id: example_data[:id] || "example_#{index}"
168
+ )
169
+ examples << example
170
+ rescue => e
171
+ errors << "Example #{index}: #{e.message}"
172
+ end
173
+ end
174
+
175
+ unless errors.empty?
176
+ raise ArgumentError, "Validation errors:\n#{errors.join("\n")}"
177
+ end
178
+
179
+ examples
180
+ end
181
+
182
+ # Equality comparison
183
+ sig { params(other: T.untyped).returns(T::Boolean) }
184
+ def ==(other)
185
+ return false unless other.is_a?(Example)
186
+
187
+ @signature_class == other.signature_class &&
188
+ input_values == other.input_values &&
189
+ expected_values == other.expected_values
190
+ end
191
+
192
+ # String representation for debugging
193
+ sig { returns(String) }
194
+ def to_s
195
+ "DSPy::Example(#{@signature_class.name}) input=#{input_values} expected=#{expected_values}"
196
+ end
197
+
198
+ sig { returns(String) }
199
+ def inspect
200
+ to_s
201
+ end
202
+ end
203
+ end
@@ -0,0 +1,81 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'sorbet-runtime'
4
+
5
+ module DSPy
6
+ class FewShotExample
7
+ extend T::Sig
8
+
9
+ sig { returns(T::Hash[Symbol, T.untyped]) }
10
+ attr_reader :input
11
+
12
+ sig { returns(T::Hash[Symbol, T.untyped]) }
13
+ attr_reader :output
14
+
15
+ sig { returns(T.nilable(String)) }
16
+ attr_reader :reasoning
17
+
18
+ sig do
19
+ params(
20
+ input: T::Hash[Symbol, T.untyped],
21
+ output: T::Hash[Symbol, T.untyped],
22
+ reasoning: T.nilable(String)
23
+ ).void
24
+ end
25
+ def initialize(input:, output:, reasoning: nil)
26
+ @input = input.freeze
27
+ @output = output.freeze
28
+ @reasoning = reasoning
29
+ end
30
+
31
+ sig { returns(String) }
32
+ def to_prompt_section
33
+ sections = []
34
+
35
+ sections << "## Input"
36
+ sections << "```json"
37
+ sections << JSON.pretty_generate(@input)
38
+ sections << "```"
39
+
40
+ if @reasoning
41
+ sections << "## Reasoning"
42
+ sections << @reasoning
43
+ end
44
+
45
+ sections << "## Output"
46
+ sections << "```json"
47
+ sections << JSON.pretty_generate(@output)
48
+ sections << "```"
49
+
50
+ sections.join("\n")
51
+ end
52
+
53
+ sig { returns(T::Hash[Symbol, T.untyped]) }
54
+ def to_h
55
+ result = {
56
+ input: @input,
57
+ output: @output
58
+ }
59
+ result[:reasoning] = @reasoning if @reasoning
60
+ result
61
+ end
62
+
63
+ sig { params(hash: T::Hash[Symbol, T.untyped]).returns(FewShotExample) }
64
+ def self.from_h(hash)
65
+ new(
66
+ input: hash[:input] || {},
67
+ output: hash[:output] || {},
68
+ reasoning: hash[:reasoning]
69
+ )
70
+ end
71
+
72
+ sig { params(other: T.untyped).returns(T::Boolean) }
73
+ def ==(other)
74
+ return false unless other.is_a?(FewShotExample)
75
+
76
+ @input == other.input &&
77
+ @output == other.output &&
78
+ @reasoning == other.reasoning
79
+ end
80
+ end
81
+ end
@@ -28,9 +28,9 @@ module DSPy
28
28
  return {} unless usage.is_a?(Hash)
29
29
 
30
30
  {
31
- tokens_input: usage[:prompt_tokens] || usage['prompt_tokens'],
32
- tokens_output: usage[:completion_tokens] || usage['completion_tokens'],
33
- tokens_total: usage[:total_tokens] || usage['total_tokens']
31
+ input_tokens: usage[:prompt_tokens] || usage['prompt_tokens'],
32
+ output_tokens: usage[:completion_tokens] || usage['completion_tokens'],
33
+ total_tokens: usage[:total_tokens] || usage['total_tokens']
34
34
  }
35
35
  end
36
36
 
@@ -44,9 +44,9 @@ module DSPy
44
44
  output_tokens = usage[:output_tokens] || usage['output_tokens'] || 0
45
45
 
46
46
  {
47
- tokens_input: input_tokens,
48
- tokens_output: output_tokens,
49
- tokens_total: input_tokens + output_tokens
47
+ input_tokens: input_tokens,
48
+ output_tokens: output_tokens,
49
+ total_tokens: input_tokens + output_tokens
50
50
  }
51
51
  end
52
52
  end
@@ -2,17 +2,34 @@
2
2
 
3
3
  require 'dry-monitor'
4
4
  require 'dry-configurable'
5
+ require 'time'
5
6
 
6
7
  module DSPy
7
8
  # Core instrumentation module using dry-monitor for event emission
8
- # Provides extension points for logging, Langfuse, New Relic, and custom monitoring
9
+ # Provides extension points for logging, OpenTelemetry, New Relic, Langfuse, and custom monitoring
9
10
  module Instrumentation
10
- # Get the current logger subscriber instance (lazy initialization)
11
- def self.logger_subscriber
12
- @logger_subscriber ||= begin
13
- require_relative 'subscribers/logger_subscriber'
14
- DSPy::Subscribers::LoggerSubscriber.new
15
- end
11
+ # Get a logger subscriber instance (creates new instance each time)
12
+ def self.logger_subscriber(**options)
13
+ require_relative 'subscribers/logger_subscriber'
14
+ DSPy::Subscribers::LoggerSubscriber.new(**options)
15
+ end
16
+
17
+ # Get an OpenTelemetry subscriber instance (creates new instance each time)
18
+ def self.otel_subscriber(**options)
19
+ require_relative 'subscribers/otel_subscriber'
20
+ DSPy::Subscribers::OtelSubscriber.new(**options)
21
+ end
22
+
23
+ # Get a New Relic subscriber instance (creates new instance each time)
24
+ def self.newrelic_subscriber(**options)
25
+ require_relative 'subscribers/newrelic_subscriber'
26
+ DSPy::Subscribers::NewrelicSubscriber.new(**options)
27
+ end
28
+
29
+ # Get a Langfuse subscriber instance (creates new instance each time)
30
+ def self.langfuse_subscriber(**options)
31
+ require_relative 'subscribers/langfuse_subscriber'
32
+ DSPy::Subscribers::LangfuseSubscriber.new(**options)
16
33
  end
17
34
 
18
35
  def self.notifications
@@ -29,6 +46,55 @@ module DSPy
29
46
  n.register_event('dspy.react.tool_call')
30
47
  n.register_event('dspy.react.iteration_complete')
31
48
  n.register_event('dspy.react.max_iterations')
49
+
50
+ # Evaluation events
51
+ n.register_event('dspy.evaluation.start')
52
+ n.register_event('dspy.evaluation.example')
53
+ n.register_event('dspy.evaluation.batch')
54
+ n.register_event('dspy.evaluation.batch_complete')
55
+
56
+ # Optimization events
57
+ n.register_event('dspy.optimization.start')
58
+ n.register_event('dspy.optimization.complete')
59
+ n.register_event('dspy.optimization.trial_start')
60
+ n.register_event('dspy.optimization.trial_complete')
61
+ n.register_event('dspy.optimization.bootstrap_start')
62
+ n.register_event('dspy.optimization.bootstrap_complete')
63
+ n.register_event('dspy.optimization.bootstrap_example')
64
+ n.register_event('dspy.optimization.minibatch_evaluation')
65
+ n.register_event('dspy.optimization.instruction_proposal_start')
66
+ n.register_event('dspy.optimization.instruction_proposal_complete')
67
+ n.register_event('dspy.optimization.error')
68
+ n.register_event('dspy.optimization.save')
69
+ n.register_event('dspy.optimization.load')
70
+
71
+ # Storage events
72
+ n.register_event('dspy.storage.save_start')
73
+ n.register_event('dspy.storage.save_complete')
74
+ n.register_event('dspy.storage.save_error')
75
+ n.register_event('dspy.storage.load_start')
76
+ n.register_event('dspy.storage.load_complete')
77
+ n.register_event('dspy.storage.load_error')
78
+ n.register_event('dspy.storage.delete')
79
+ n.register_event('dspy.storage.export')
80
+ n.register_event('dspy.storage.import')
81
+ n.register_event('dspy.storage.cleanup')
82
+
83
+ # Registry events
84
+ n.register_event('dspy.registry.register_start')
85
+ n.register_event('dspy.registry.register_complete')
86
+ n.register_event('dspy.registry.register_error')
87
+ n.register_event('dspy.registry.deploy_start')
88
+ n.register_event('dspy.registry.deploy_complete')
89
+ n.register_event('dspy.registry.deploy_error')
90
+ n.register_event('dspy.registry.rollback_start')
91
+ n.register_event('dspy.registry.rollback_complete')
92
+ n.register_event('dspy.registry.rollback_error')
93
+ n.register_event('dspy.registry.performance_update')
94
+ n.register_event('dspy.registry.export')
95
+ n.register_event('dspy.registry.import')
96
+ n.register_event('dspy.registry.auto_deployment')
97
+ n.register_event('dspy.registry.automatic_rollback')
32
98
  end
33
99
  end
34
100
 
@@ -49,9 +115,8 @@ module DSPy
49
115
  enhanced_payload = payload.merge(
50
116
  duration_ms: ((end_time - start_time) * 1000).round(2),
51
117
  cpu_time_ms: ((end_cpu - start_cpu) * 1000).round(2),
52
- status: 'success',
53
- timestamp: Time.now.iso8601
54
- )
118
+ status: 'success'
119
+ ).merge(generate_timestamp)
55
120
 
56
121
  self.emit_event(event_name, enhanced_payload)
57
122
  result
@@ -64,9 +129,8 @@ module DSPy
64
129
  cpu_time_ms: ((end_cpu - start_cpu) * 1000).round(2),
65
130
  status: 'error',
66
131
  error_type: error.class.name,
67
- error_message: error.message,
68
- timestamp: Time.now.iso8601
69
- )
132
+ error_message: error.message
133
+ ).merge(generate_timestamp)
70
134
 
71
135
  self.emit_event(event_name, error_payload)
72
136
  raise
@@ -75,10 +139,12 @@ module DSPy
75
139
 
76
140
  # Emit event without timing (for discrete events)
77
141
  def self.emit(event_name, payload = {})
142
+ # Handle nil payload
143
+ payload ||= {}
144
+
78
145
  enhanced_payload = payload.merge(
79
- timestamp: Time.now.iso8601,
80
146
  status: payload[:status] || 'success'
81
- )
147
+ ).merge(generate_timestamp)
82
148
 
83
149
  self.emit_event(event_name, enhanced_payload)
84
150
  end
@@ -101,13 +167,128 @@ module DSPy
101
167
  end
102
168
 
103
169
  def self.emit_event(event_name, payload)
104
- # Ensure logger subscriber is initialized
105
- logger_subscriber
170
+ # Only emit events - subscribers self-register when explicitly created
106
171
  notifications.instrument(event_name, payload)
107
172
  end
108
173
 
109
174
  def self.setup_subscribers
110
- # Lazy initialization - will be created when first accessed
175
+ config = DSPy.config.instrumentation
176
+
177
+ # Return early if instrumentation is disabled
178
+ return unless config.enabled
179
+
180
+ # Validate configuration first
181
+ DSPy.validate_instrumentation!
182
+
183
+ # Setup each configured subscriber
184
+ config.subscribers.each do |subscriber_type|
185
+ setup_subscriber(subscriber_type)
186
+ end
187
+ end
188
+
189
+ def self.setup_subscriber(subscriber_type)
190
+ case subscriber_type
191
+ when :logger
192
+ setup_logger_subscriber
193
+ when :otel
194
+ setup_otel_subscriber if otel_available?
195
+ when :newrelic
196
+ setup_newrelic_subscriber if newrelic_available?
197
+ when :langfuse
198
+ setup_langfuse_subscriber if langfuse_available?
199
+ else
200
+ raise ArgumentError, "Unknown subscriber type: #{subscriber_type}"
201
+ end
202
+ rescue LoadError => e
203
+ DSPy.logger.warn "Failed to setup #{subscriber_type} subscriber: #{e.message}"
204
+ end
205
+
206
+ def self.setup_logger_subscriber
207
+ # Create subscriber - it will read configuration when handling events
208
+ logger_subscriber
209
+ end
210
+
211
+ def self.setup_otel_subscriber
212
+ # Create subscriber - it will read configuration when handling events
213
+ otel_subscriber
214
+ end
215
+
216
+ def self.setup_newrelic_subscriber
217
+ # Create subscriber - it will read configuration when handling events
218
+ newrelic_subscriber
219
+ end
220
+
221
+ def self.setup_langfuse_subscriber
222
+ # Create subscriber - it will read configuration when handling events
223
+ langfuse_subscriber
224
+ end
225
+
226
+ # Dependency checking methods
227
+ def self.otel_available?
228
+ begin
229
+ require 'opentelemetry/sdk'
230
+ true
231
+ rescue LoadError
232
+ false
233
+ end
234
+ end
235
+
236
+ def self.newrelic_available?
237
+ begin
238
+ require 'newrelic_rpm'
239
+ true
240
+ rescue LoadError
241
+ false
242
+ end
243
+ end
244
+
245
+ def self.langfuse_available?
246
+ begin
247
+ require 'langfuse'
248
+ true
249
+ rescue LoadError
250
+ false
251
+ end
252
+ end
253
+
254
+ # Generate timestamp in the configured format
255
+ def self.generate_timestamp
256
+ case DSPy.config.instrumentation.timestamp_format
257
+ when DSPy::TimestampFormat::ISO8601
258
+ { timestamp: Time.now.iso8601 }
259
+ when DSPy::TimestampFormat::RFC3339_NANO
260
+ { timestamp: Time.now.strftime('%Y-%m-%dT%H:%M:%S.%9N%z') }
261
+ when DSPy::TimestampFormat::UNIX_NANO
262
+ { timestamp_ns: (Time.now.to_f * 1_000_000_000).to_i }
263
+ else
264
+ { timestamp: Time.now.iso8601 } # Fallback to iso8601
265
+ end
266
+ end
267
+
268
+ # Legacy setup method for backward compatibility
269
+ def self.setup_subscribers_legacy
270
+ # Legacy initialization - will be created when first accessed
271
+ # Force initialization of enabled subscribers
272
+ logger_subscriber
273
+
274
+ # Only initialize if dependencies are available
275
+ begin
276
+ otel_subscriber if ENV['OTEL_EXPORTER_OTLP_ENDPOINT'] || defined?(OpenTelemetry)
277
+ rescue LoadError
278
+ # OpenTelemetry not available, skip
279
+ end
280
+
281
+ begin
282
+ newrelic_subscriber if defined?(NewRelic)
283
+ rescue LoadError
284
+ # New Relic not available, skip
285
+ end
286
+
287
+ begin
288
+ langfuse_subscriber if ENV['LANGFUSE_SECRET_KEY'] || defined?(Langfuse)
289
+ rescue LoadError
290
+ # Langfuse not available, skip
291
+ end
111
292
  end
112
293
  end
113
294
  end
@@ -7,8 +7,7 @@ module DSPy
7
7
  # Maps provider prefixes to adapter classes
8
8
  ADAPTER_MAP = {
9
9
  'openai' => 'OpenAIAdapter',
10
- 'anthropic' => 'AnthropicAdapter',
11
- 'ruby_llm' => 'RubyLLMAdapter'
10
+ 'anthropic' => 'AnthropicAdapter'
12
11
  }.freeze
13
12
 
14
13
  class << self
@@ -27,13 +26,12 @@ module DSPy
27
26
 
28
27
  # Parse model_id to determine provider and model
29
28
  def parse_model_id(model_id)
30
- if model_id.include?('/')
31
- provider, model = model_id.split('/', 2)
32
- [provider, model]
33
- else
34
- # Legacy format: assume ruby_llm for backward compatibility
35
- ['ruby_llm', model_id]
29
+ unless model_id.include?('/')
30
+ raise ArgumentError, "model_id must include provider (e.g., 'openai/gpt-4', 'anthropic/claude-3'). Legacy format without provider is no longer supported."
36
31
  end
32
+
33
+ provider, model = model_id.split('/', 2)
34
+ [provider, model]
37
35
  end
38
36
 
39
37
  def get_adapter_class(provider)