dspy 0.28.1 → 0.29.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (45) hide show
  1. checksums.yaml +4 -4
  2. data/README.md +2 -3
  3. data/lib/dspy/callbacks.rb +222 -0
  4. data/lib/dspy/chain_of_thought.rb +2 -1
  5. data/lib/dspy/code_act.rb +14 -1
  6. data/lib/dspy/datasets/ade.rb +90 -0
  7. data/lib/dspy/datasets.rb +8 -0
  8. data/lib/dspy/lm.rb +9 -12
  9. data/lib/dspy/mixins/struct_builder.rb +17 -25
  10. data/lib/dspy/module.rb +45 -1
  11. data/lib/dspy/observability/async_span_processor.rb +67 -93
  12. data/lib/dspy/observability.rb +43 -1
  13. data/lib/dspy/predict.rb +17 -0
  14. data/lib/dspy/prompt.rb +90 -20
  15. data/lib/dspy/propose/dataset_summary_generator.rb +210 -0
  16. data/lib/dspy/propose/grounded_proposer.rb +320 -66
  17. data/lib/dspy/re_act.rb +13 -0
  18. data/lib/dspy/reflection_lm.rb +36 -0
  19. data/lib/dspy/teleprompt/bootstrap_strategy.rb +26 -0
  20. data/lib/dspy/teleprompt/gepa.rb +448 -2803
  21. data/lib/dspy/teleprompt/mipro_v2.rb +624 -100
  22. data/lib/dspy/teleprompt/utils.rb +349 -42
  23. data/lib/dspy/version.rb +2 -2
  24. data/lib/dspy.rb +4 -2
  25. data/lib/gepa/api.rb +61 -0
  26. data/lib/gepa/core/engine.rb +226 -0
  27. data/lib/gepa/core/evaluation_batch.rb +26 -0
  28. data/lib/gepa/core/result.rb +92 -0
  29. data/lib/gepa/core/state.rb +231 -0
  30. data/lib/gepa/logging/experiment_tracker.rb +54 -0
  31. data/lib/gepa/logging/logger.rb +57 -0
  32. data/lib/gepa/logging.rb +9 -0
  33. data/lib/gepa/proposer/base.rb +27 -0
  34. data/lib/gepa/proposer/merge_proposer.rb +424 -0
  35. data/lib/gepa/proposer/reflective_mutation/base.rb +48 -0
  36. data/lib/gepa/proposer/reflective_mutation/reflective_mutation.rb +188 -0
  37. data/lib/gepa/strategies/batch_sampler.rb +91 -0
  38. data/lib/gepa/strategies/candidate_selector.rb +97 -0
  39. data/lib/gepa/strategies/component_selector.rb +57 -0
  40. data/lib/gepa/strategies/instruction_proposal.rb +120 -0
  41. data/lib/gepa/telemetry.rb +122 -0
  42. data/lib/gepa/utils/pareto.rb +119 -0
  43. data/lib/gepa.rb +21 -0
  44. metadata +59 -4
  45. data/lib/dspy/teleprompt/simple_optimizer.rb +0 -497
@@ -1,15 +1,14 @@
1
1
  # frozen_string_literal: true
2
2
 
3
- require 'async'
4
- require 'async/queue'
5
- require 'async/barrier'
3
+ require 'concurrent-ruby'
4
+ require 'thread'
6
5
  require 'opentelemetry/sdk'
7
6
  require 'opentelemetry/sdk/trace/export'
8
7
 
9
8
  module DSPy
10
9
  class Observability
11
- # AsyncSpanProcessor provides truly non-blocking span export using Async gem.
12
- # Spans are queued and exported using async tasks with fiber-based concurrency.
10
+ # AsyncSpanProcessor provides non-blocking span export using concurrent-ruby.
11
+ # Spans are queued and exported on a dedicated single-thread executor to avoid blocking clients.
13
12
  # Implements the same interface as OpenTelemetry::SDK::Trace::Export::BatchSpanProcessor
14
13
  class AsyncSpanProcessor
15
14
  # Default configuration values
@@ -33,12 +32,12 @@ module DSPy
33
32
  @export_batch_size = export_batch_size
34
33
  @shutdown_timeout = shutdown_timeout
35
34
  @max_retries = max_retries
35
+ @export_executor = Concurrent::SingleThreadExecutor.new
36
36
 
37
37
  # Use thread-safe queue for cross-fiber communication
38
38
  @queue = Thread::Queue.new
39
- @barrier = Async::Barrier.new
40
39
  @shutdown_requested = false
41
- @export_task = nil
40
+ @timer_thread = nil
42
41
 
43
42
  start_export_task
44
43
  end
@@ -85,22 +84,35 @@ module DSPy
85
84
 
86
85
  begin
87
86
  # Export any remaining spans
88
- export_remaining_spans
87
+ result = export_remaining_spans(timeout: timeout, export_all: true)
89
88
 
90
- # Shutdown exporter
91
- @exporter.shutdown(timeout: timeout)
89
+ future = Concurrent::Promises.future_on(@export_executor) do
90
+ @exporter.shutdown(timeout: timeout)
91
+ end
92
+ future.value!(timeout)
92
93
 
93
- OpenTelemetry::SDK::Trace::Export::SUCCESS
94
+ result
94
95
  rescue => e
95
96
  DSPy.log('observability.shutdown_error', error: e.message, class: e.class.name)
96
97
  OpenTelemetry::SDK::Trace::Export::FAILURE
98
+ ensure
99
+ begin
100
+ @timer_thread&.join(timeout)
101
+ @timer_thread&.kill if @timer_thread&.alive?
102
+ rescue StandardError
103
+ # ignore timer shutdown issues
104
+ end
105
+ @export_executor.shutdown
106
+ unless @export_executor.wait_for_termination(timeout)
107
+ @export_executor.kill
108
+ end
97
109
  end
98
110
  end
99
111
 
100
112
  def force_flush(timeout: nil)
101
113
  return OpenTelemetry::SDK::Trace::Export::SUCCESS if @queue.empty?
102
114
 
103
- export_remaining_spans
115
+ export_remaining_spans(timeout: timeout, export_all: true)
104
116
  end
105
117
 
106
118
  private
@@ -109,19 +121,15 @@ module DSPy
109
121
  return if @export_interval <= 0 # Disable timer for testing
110
122
  return if ENV['DSPY_DISABLE_OBSERVABILITY'] == 'true' # Skip in tests
111
123
 
112
- # Start timer-based export task in background
113
- Thread.new do
124
+ @timer_thread = Thread.new do
114
125
  loop do
115
126
  break if @shutdown_requested
116
127
 
117
128
  sleep(@export_interval)
129
+ break if @shutdown_requested
130
+ next if @queue.empty?
118
131
 
119
- # Export queued spans in sync block
120
- unless @queue.empty?
121
- Sync do
122
- export_queued_spans
123
- end
124
- end
132
+ schedule_async_export(export_all: true)
125
133
  end
126
134
  rescue => e
127
135
  DSPy.log('observability.export_task_error', error: e.message, class: e.class.name)
@@ -131,39 +139,56 @@ module DSPy
131
139
  def trigger_export_if_batch_full
132
140
  return if @queue.size < @export_batch_size
133
141
  return if ENV['DSPY_DISABLE_OBSERVABILITY'] == 'true' # Skip in tests
142
+ schedule_async_export(export_all: false)
143
+ end
134
144
 
135
- # Trigger immediate export in background
136
- Thread.new do
137
- Sync do
138
- export_queued_spans
139
- end
140
- rescue => e
141
- DSPy.log('observability.batch_export_error', error: e.message)
145
+ def export_remaining_spans(timeout: nil, export_all: true)
146
+ return OpenTelemetry::SDK::Trace::Export::SUCCESS if @queue.empty?
147
+
148
+ future = Concurrent::Promises.future_on(@export_executor) do
149
+ export_queued_spans_internal(export_all: export_all)
142
150
  end
151
+
152
+ future.value!(timeout || @shutdown_timeout)
153
+ rescue => e
154
+ DSPy.log('observability.export_error', error: e.message, class: e.class.name)
155
+ OpenTelemetry::SDK::Trace::Export::FAILURE
143
156
  end
144
157
 
145
- def export_remaining_spans
146
- spans = []
158
+ def schedule_async_export(export_all: false)
159
+ return if @shutdown_requested
147
160
 
148
- # Drain entire queue
149
- until @queue.empty?
150
- begin
151
- spans << @queue.pop(true) # non-blocking pop
152
- rescue ThreadError
153
- break
154
- end
161
+ @export_executor.post do
162
+ export_queued_spans_internal(export_all: export_all)
163
+ rescue => e
164
+ DSPy.log('observability.batch_export_error', error: e.message, class: e.class.name)
155
165
  end
166
+ end
167
+
168
+ def export_queued_spans
169
+ export_queued_spans_internal(export_all: false)
170
+ end
171
+
172
+ def export_queued_spans_internal(export_all: false)
173
+ result = OpenTelemetry::SDK::Trace::Export::SUCCESS
174
+
175
+ loop do
176
+ spans = dequeue_spans(export_all ? @queue_size : @export_batch_size)
177
+ break if spans.empty?
156
178
 
157
- return OpenTelemetry::SDK::Trace::Export::SUCCESS if spans.empty?
179
+ result = export_spans_with_retry(spans)
180
+ break if result == OpenTelemetry::SDK::Trace::Export::FAILURE
158
181
 
159
- export_spans_with_retry(spans)
182
+ break unless export_all || @queue.size >= @export_batch_size
183
+ end
184
+
185
+ result
160
186
  end
161
187
 
162
- def export_queued_spans
188
+ def dequeue_spans(limit)
163
189
  spans = []
164
190
 
165
- # Collect up to batch size
166
- @export_batch_size.times do
191
+ limit.times do
167
192
  begin
168
193
  spans << @queue.pop(true) # non-blocking pop
169
194
  rescue ThreadError
@@ -171,12 +196,7 @@ module DSPy
171
196
  end
172
197
  end
173
198
 
174
- return if spans.empty?
175
-
176
- # Export using async I/O
177
- Sync do
178
- export_spans_with_retry_async(spans)
179
- end
199
+ spans
180
200
  end
181
201
 
182
202
  def export_spans_with_retry(spans)
@@ -225,52 +245,6 @@ module DSPy
225
245
  OpenTelemetry::SDK::Trace::Export::FAILURE
226
246
  end
227
247
 
228
- def export_spans_with_retry_async(spans)
229
- retries = 0
230
-
231
- # Convert spans to SpanData objects (required by OTLP exporter)
232
- span_data_batch = spans.map(&:to_span_data)
233
-
234
- # Log export attempt
235
- DSPy.log('observability.export_attempt',
236
- spans_count: span_data_batch.size,
237
- batch_size: span_data_batch.size)
238
-
239
- loop do
240
- # Use current async task for potentially non-blocking export
241
- result = @exporter.export(span_data_batch, timeout: @shutdown_timeout)
242
-
243
- case result
244
- when OpenTelemetry::SDK::Trace::Export::SUCCESS
245
- DSPy.log('observability.export_success',
246
- spans_count: span_data_batch.size,
247
- export_result: 'SUCCESS')
248
- return result
249
- when OpenTelemetry::SDK::Trace::Export::FAILURE
250
- retries += 1
251
- if retries <= @max_retries
252
- backoff_seconds = 0.1 * (2 ** retries)
253
- DSPy.log('observability.export_retry',
254
- attempt: retries,
255
- spans_count: span_data_batch.size,
256
- backoff_seconds: backoff_seconds)
257
- # Async sleep for exponential backoff
258
- Async::Task.current.sleep(backoff_seconds)
259
- next
260
- else
261
- DSPy.log('observability.export_failed',
262
- spans_count: span_data_batch.size,
263
- retries: retries)
264
- return result
265
- end
266
- else
267
- return result
268
- end
269
- end
270
- rescue => e
271
- DSPy.log('observability.export_error', error: e.message, class: e.class.name)
272
- OpenTelemetry::SDK::Trace::Export::FAILURE
273
- end
274
248
  end
275
249
  end
276
250
  end
@@ -41,6 +41,8 @@ module DSPy
41
41
  require 'opentelemetry/sdk'
42
42
  require 'opentelemetry/exporter/otlp'
43
43
 
44
+ patch_frozen_ssl_context_for_otlp!
45
+
44
46
  # Generate Basic Auth header
45
47
  auth_string = Base64.strict_encode64("#{public_key}:#{secret_key}")
46
48
 
@@ -150,6 +152,46 @@ module DSPy
150
152
  @tracer = nil
151
153
  @endpoint = nil
152
154
  end
155
+
156
+ private
157
+
158
+ def patch_frozen_ssl_context_for_otlp!
159
+ return unless defined?(OpenTelemetry::Exporter::OTLP::Exporter)
160
+
161
+ ssl_context_frozen = begin
162
+ http = Net::HTTP.new('example.com', 443)
163
+ http.use_ssl = true
164
+ http.ssl_context&.frozen?
165
+ rescue StandardError
166
+ false
167
+ end
168
+
169
+ return unless ssl_context_frozen
170
+
171
+ exporter = OpenTelemetry::Exporter::OTLP::Exporter
172
+ return if exporter.instance_variable_defined?(:@_dspy_ssl_patch_applied)
173
+
174
+ exporter.class_eval do
175
+ define_method(:http_connection) do |uri, ssl_verify_mode, certificate_file, client_certificate_file, client_key_file|
176
+ http = Net::HTTP.new(uri.host, uri.port)
177
+ use_ssl = uri.scheme == 'https'
178
+ http.use_ssl = use_ssl
179
+
180
+ if use_ssl && http.ssl_context&.frozen?
181
+ http.instance_variable_set(:@ssl_context, OpenSSL::SSL::SSLContext.new)
182
+ end
183
+
184
+ http.verify_mode = ssl_verify_mode
185
+ http.ca_file = certificate_file unless certificate_file.nil?
186
+ http.cert = OpenSSL::X509::Certificate.new(File.read(client_certificate_file)) unless client_certificate_file.nil?
187
+ http.key = OpenSSL::PKey::RSA.new(File.read(client_key_file)) unless client_key_file.nil?
188
+ http.keep_alive_timeout = KEEP_ALIVE_TIMEOUT
189
+ http
190
+ end
191
+ end
192
+
193
+ exporter.instance_variable_set(:@_dspy_ssl_patch_applied, true)
194
+ end
153
195
  end
154
196
  end
155
- end
197
+ end
data/lib/dspy/predict.rb CHANGED
@@ -53,11 +53,18 @@ module DSPy
53
53
  sig { returns(Prompt) }
54
54
  attr_reader :prompt
55
55
 
56
+ # Mutable demos attribute for MIPROv2 compatibility
57
+ sig { returns(T.nilable(T::Array[FewShotExample])) }
58
+ attr_accessor :demos
59
+
56
60
  sig { params(signature_class: T.class_of(Signature)).void }
57
61
  def initialize(signature_class)
58
62
  super()
59
63
  @signature_class = signature_class
64
+
65
+ # Prompt will read schema_format from config automatically
60
66
  @prompt = Prompt.from_signature(signature_class)
67
+ @demos = nil
61
68
  end
62
69
 
63
70
  # Reconstruct program from serialized hash
@@ -131,6 +138,16 @@ module DSPy
131
138
  with_prompt(@prompt.add_examples(examples))
132
139
  end
133
140
 
141
+ sig { override.returns(T::Array[[String, DSPy::Module]]) }
142
+ def named_predictors
143
+ [["self", self]]
144
+ end
145
+
146
+ sig { override.returns(T::Array[DSPy::Module]) }
147
+ def predictors
148
+ [self]
149
+ end
150
+
134
151
  # Remove forward override to let Module#forward handle span creation
135
152
 
136
153
  sig { params(input_values: T.untyped).returns(T.untyped) }
data/lib/dspy/prompt.rb CHANGED
@@ -22,21 +22,39 @@ module DSPy
22
22
  sig { returns(T.nilable(String)) }
23
23
  attr_reader :signature_class_name
24
24
 
25
+ # Returns the effective schema format
26
+ # Precedence: instance variable (if not :json default) > config.lm > :json
27
+ sig { returns(Symbol) }
28
+ def schema_format
29
+ # If @schema_format was explicitly set to something other than :json, respect it
30
+ return @schema_format if @schema_format && @schema_format != :json
31
+
32
+ # Otherwise, read from config if available
33
+ DSPy.config.lm&.schema_format || @schema_format || :json
34
+ end
35
+
36
+ sig { returns(T.nilable(T.class_of(Signature))) }
37
+ attr_reader :signature_class
38
+
25
39
  sig do
26
40
  params(
27
41
  instruction: String,
28
42
  input_schema: T::Hash[Symbol, T.untyped],
29
43
  output_schema: T::Hash[Symbol, T.untyped],
30
44
  few_shot_examples: T::Array[FewShotExample],
31
- signature_class_name: T.nilable(String)
45
+ signature_class_name: T.nilable(String),
46
+ schema_format: Symbol,
47
+ signature_class: T.nilable(T.class_of(Signature))
32
48
  ).void
33
49
  end
34
- def initialize(instruction:, input_schema:, output_schema:, few_shot_examples: [], signature_class_name: nil)
50
+ def initialize(instruction:, input_schema:, output_schema:, few_shot_examples: [], signature_class_name: nil, schema_format: :json, signature_class: nil)
35
51
  @instruction = instruction
36
52
  @few_shot_examples = few_shot_examples.freeze
37
53
  @input_schema = input_schema.freeze
38
54
  @output_schema = output_schema.freeze
39
55
  @signature_class_name = signature_class_name
56
+ @schema_format = schema_format
57
+ @signature_class = signature_class
40
58
  end
41
59
 
42
60
  # Immutable update methods for optimization
@@ -47,7 +65,9 @@ module DSPy
47
65
  input_schema: @input_schema,
48
66
  output_schema: @output_schema,
49
67
  few_shot_examples: @few_shot_examples,
50
- signature_class_name: @signature_class_name
68
+ signature_class_name: @signature_class_name,
69
+ schema_format: @schema_format,
70
+ signature_class: @signature_class
51
71
  )
52
72
  end
53
73
 
@@ -58,7 +78,9 @@ module DSPy
58
78
  input_schema: @input_schema,
59
79
  output_schema: @output_schema,
60
80
  few_shot_examples: new_examples,
61
- signature_class_name: @signature_class_name
81
+ signature_class_name: @signature_class_name,
82
+ schema_format: @schema_format,
83
+ signature_class: @signature_class
62
84
  )
63
85
  end
64
86
 
@@ -72,16 +94,29 @@ module DSPy
72
94
  sig { returns(String) }
73
95
  def render_system_prompt
74
96
  sections = []
75
-
76
- sections << "Your input schema fields are:"
77
- sections << "```json"
78
- sections << JSON.pretty_generate(@input_schema)
79
- sections << "```"
80
-
81
- sections << "Your output schema fields are:"
82
- sections << "```json"
83
- sections << JSON.pretty_generate(@output_schema)
84
- sections << "```"
97
+
98
+ case schema_format
99
+ when :baml
100
+ sections << "Your input schema fields are:"
101
+ sections << "```baml"
102
+ sections << render_baml_schema(@input_schema, :input)
103
+ sections << "```"
104
+
105
+ sections << "Your output schema fields are:"
106
+ sections << "```baml"
107
+ sections << render_baml_schema(@output_schema, :output)
108
+ sections << "```"
109
+ else # :json (default)
110
+ sections << "Your input schema fields are:"
111
+ sections << "```json"
112
+ sections << JSON.pretty_generate(@input_schema)
113
+ sections << "```"
114
+
115
+ sections << "Your output schema fields are:"
116
+ sections << "```json"
117
+ sections << JSON.pretty_generate(@output_schema)
118
+ sections << "```"
119
+ end
85
120
 
86
121
  sections << ""
87
122
  sections << "All interactions will be structured in the following way, with the appropriate values filled in."
@@ -148,32 +183,36 @@ module DSPy
148
183
  few_shot_examples: @few_shot_examples.map(&:to_h),
149
184
  input_schema: @input_schema,
150
185
  output_schema: @output_schema,
151
- signature_class_name: @signature_class_name
186
+ signature_class_name: @signature_class_name,
187
+ schema_format: @schema_format
152
188
  }
153
189
  end
154
190
 
155
191
  sig { params(hash: T::Hash[Symbol, T.untyped]).returns(Prompt) }
156
192
  def self.from_h(hash)
157
193
  examples = (hash[:few_shot_examples] || []).map { |ex| FewShotExample.from_h(ex) }
158
-
194
+
159
195
  new(
160
196
  instruction: hash[:instruction] || "",
161
197
  input_schema: hash[:input_schema] || {},
162
198
  output_schema: hash[:output_schema] || {},
163
199
  few_shot_examples: examples,
164
- signature_class_name: hash[:signature_class_name]
200
+ signature_class_name: hash[:signature_class_name],
201
+ schema_format: hash[:schema_format] || :json
165
202
  )
166
203
  end
167
204
 
168
205
  # Create prompt from signature class
169
- sig { params(signature_class: T.class_of(Signature)).returns(Prompt) }
170
- def self.from_signature(signature_class)
206
+ sig { params(signature_class: T.class_of(Signature), schema_format: Symbol).returns(Prompt) }
207
+ def self.from_signature(signature_class, schema_format: :json)
171
208
  new(
172
209
  instruction: signature_class.description || "Complete this task.",
173
210
  input_schema: signature_class.input_json_schema,
174
211
  output_schema: signature_class.output_json_schema,
175
212
  few_shot_examples: [],
176
- signature_class_name: signature_class.name
213
+ signature_class_name: signature_class.name,
214
+ schema_format: schema_format,
215
+ signature_class: signature_class
177
216
  )
178
217
  end
179
218
 
@@ -221,6 +260,37 @@ module DSPy
221
260
 
222
261
  private
223
262
 
263
+ # Render BAML schema for input or output
264
+ sig { params(schema: T::Hash[Symbol, T.untyped], type: Symbol).returns(String) }
265
+ def render_baml_schema(schema, type)
266
+ # If we have a signature_class, use sorbet-baml's to_baml method with custom name
267
+ if @signature_class
268
+ begin
269
+ require 'sorbet_baml'
270
+
271
+ struct_class = type == :input ? @signature_class.input_struct_class : @signature_class.output_struct_class
272
+ if struct_class
273
+ # Generate a proper class name from signature class name
274
+ base_name = @signature_class_name || @signature_class.name || "Schema"
275
+ class_name = type == :input ? "#{base_name}Input" : "#{base_name}Output"
276
+
277
+ # Get raw BAML and replace the ugly class name
278
+ raw_baml = struct_class.to_baml
279
+ # Replace the class definition line with a proper name
280
+ return raw_baml.sub(/^class #<Class:0x[0-9a-f]+>/, "class #{class_name}")
281
+ end
282
+ rescue LoadError
283
+ # Fall back to manual BAML generation if sorbet_baml is not available
284
+ end
285
+ end
286
+
287
+ # Fallback: generate BAML manually from schema
288
+ # This is a simple implementation that handles basic types
289
+ # For production use, sorbet-baml should be available
290
+ "# BAML schema generation requires sorbet-baml gem\n" \
291
+ "# Please install: gem install sorbet-baml"
292
+ end
293
+
224
294
  # Recursively serialize complex objects for JSON representation
225
295
  sig { params(obj: T.untyped).returns(T.untyped) }
226
296
  def serialize_for_json(obj)