dspy 0.3.1 → 0.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,336 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'sorbet-runtime'
4
+ require_relative '../instrumentation'
5
+ require_relative '../evaluate'
6
+ require_relative '../example'
7
+
8
+ module DSPy
9
+ module Teleprompt
10
+ # Base class for all DSPy teleprompters (optimizers)
11
+ # Defines the common interface and provides shared functionality for prompt optimization
12
+ class Teleprompter
13
+ extend T::Sig
14
+
15
+ # Configuration for optimization runs
16
+ class Config
17
+ extend T::Sig
18
+
19
+ sig { returns(T.nilable(Integer)) }
20
+ attr_accessor :max_bootstrapped_examples
21
+
22
+ sig { returns(T.nilable(Integer)) }
23
+ attr_accessor :max_labeled_examples
24
+
25
+ sig { returns(T.nilable(Integer)) }
26
+ attr_accessor :num_candidate_examples
27
+
28
+ sig { returns(T.nilable(Integer)) }
29
+ attr_accessor :num_threads
30
+
31
+ sig { returns(T.nilable(Integer)) }
32
+ attr_accessor :max_errors
33
+
34
+ sig { returns(T::Boolean) }
35
+ attr_accessor :require_validation_examples
36
+
37
+ sig { returns(T::Boolean) }
38
+ attr_accessor :save_intermediate_results
39
+
40
+ sig { returns(T.nilable(String)) }
41
+ attr_accessor :save_path
42
+
43
+ sig { void }
44
+ def initialize
45
+ @max_bootstrapped_examples = 4
46
+ @max_labeled_examples = 16
47
+ @num_candidate_examples = 50
48
+ @num_threads = 1
49
+ @max_errors = 5
50
+ @require_validation_examples = true
51
+ @save_intermediate_results = false
52
+ @save_path = nil
53
+ end
54
+
55
+ sig { returns(T::Hash[Symbol, T.untyped]) }
56
+ def to_h
57
+ {
58
+ max_bootstrapped_examples: @max_bootstrapped_examples,
59
+ max_labeled_examples: @max_labeled_examples,
60
+ num_candidate_examples: @num_candidate_examples,
61
+ num_threads: @num_threads,
62
+ max_errors: @max_errors,
63
+ require_validation_examples: @require_validation_examples,
64
+ save_intermediate_results: @save_intermediate_results,
65
+ save_path: @save_path
66
+ }
67
+ end
68
+ end
69
+
70
+ # Result of an optimization run
71
+ class OptimizationResult
72
+ extend T::Sig
73
+
74
+ sig { returns(T.untyped) }
75
+ attr_reader :optimized_program
76
+
77
+ sig { returns(T::Hash[Symbol, T.untyped]) }
78
+ attr_reader :scores
79
+
80
+ sig { returns(T::Hash[Symbol, T.untyped]) }
81
+ attr_reader :history
82
+
83
+ sig { returns(T.nilable(String)) }
84
+ attr_reader :best_score_name
85
+
86
+ sig { returns(T.nilable(Float)) }
87
+ attr_reader :best_score_value
88
+
89
+ sig { returns(T::Hash[Symbol, T.untyped]) }
90
+ attr_reader :metadata
91
+
92
+ sig do
93
+ params(
94
+ optimized_program: T.untyped,
95
+ scores: T::Hash[Symbol, T.untyped],
96
+ history: T::Hash[Symbol, T.untyped],
97
+ best_score_name: T.nilable(String),
98
+ best_score_value: T.nilable(Float),
99
+ metadata: T::Hash[Symbol, T.untyped]
100
+ ).void
101
+ end
102
+ def initialize(optimized_program:, scores:, history:, best_score_name: nil, best_score_value: nil, metadata: {})
103
+ @optimized_program = optimized_program
104
+ @scores = scores.freeze
105
+ @history = history.freeze
106
+ @best_score_name = best_score_name
107
+ @best_score_value = best_score_value
108
+ @metadata = metadata.freeze
109
+ end
110
+
111
+ sig { returns(T::Hash[Symbol, T.untyped]) }
112
+ def to_h
113
+ {
114
+ scores: @scores,
115
+ history: @history,
116
+ best_score_name: @best_score_name,
117
+ best_score_value: @best_score_value,
118
+ metadata: @metadata
119
+ }
120
+ end
121
+ end
122
+
123
+ sig { returns(Config) }
124
+ attr_reader :config
125
+
126
+ sig { returns(T.nilable(T.proc.params(arg0: T.untyped, arg1: T.untyped).returns(T.untyped))) }
127
+ attr_reader :metric
128
+
129
+ sig { returns(T.nilable(DSPy::Evaluate)) }
130
+ attr_reader :evaluator
131
+
132
+ sig do
133
+ params(
134
+ metric: T.nilable(T.proc.params(arg0: T.untyped, arg1: T.untyped).returns(T.untyped)),
135
+ config: T.nilable(Config)
136
+ ).void
137
+ end
138
+ def initialize(metric: nil, config: nil)
139
+ @metric = metric
140
+ @config = config || Config.new
141
+ @evaluator = nil
142
+ end
143
+
144
+ # Main optimization method - must be implemented by subclasses
145
+ sig do
146
+ params(
147
+ program: T.untyped,
148
+ trainset: T::Array[T.untyped],
149
+ valset: T.nilable(T::Array[T.untyped])
150
+ ).returns(OptimizationResult)
151
+ end
152
+ def compile(program, trainset:, valset: nil)
153
+ raise NotImplementedError, "Subclasses must implement the compile method"
154
+ end
155
+
156
+ # Validate optimization inputs
157
+ sig do
158
+ params(
159
+ program: T.untyped,
160
+ trainset: T::Array[T.untyped],
161
+ valset: T.nilable(T::Array[T.untyped])
162
+ ).void
163
+ end
164
+ def validate_inputs(program, trainset, valset = nil)
165
+ raise ArgumentError, "Program cannot be nil" unless program
166
+ raise ArgumentError, "Training set cannot be empty" if trainset.empty?
167
+
168
+ if @config.require_validation_examples && (valset.nil? || valset.empty?)
169
+ raise ArgumentError, "Validation set is required but not provided"
170
+ end
171
+
172
+ # Validate training examples
173
+ validate_examples(trainset, "training")
174
+ validate_examples(valset, "validation") if valset && valset.any?
175
+ end
176
+
177
+ # Ensure examples are properly typed (only DSPy::Example instances supported)
178
+ sig { params(examples: T::Array[T.untyped], signature_class: T.nilable(T.class_of(Signature))).returns(T::Array[DSPy::Example]) }
179
+ def ensure_typed_examples(examples, signature_class = nil)
180
+ # If examples are already DSPy::Example objects, return as-is
181
+ return examples if examples.all? { |ex| ex.is_a?(DSPy::Example) }
182
+
183
+ raise ArgumentError, "All examples must be DSPy::Example instances. Legacy format support has been removed. Please convert your examples to use the structured format with :input and :expected keys."
184
+ end
185
+
186
+ # Create evaluator for given examples and metric
187
+ sig { params(examples: T::Array[T.untyped]).returns(DSPy::Evaluate) }
188
+ def create_evaluator(examples)
189
+ # Use provided metric or create a default one for DSPy::Example objects
190
+ evaluation_metric = @metric || default_metric_for_examples(examples)
191
+
192
+ @evaluator = DSPy::Evaluate.new(
193
+ nil, # Program will be set during evaluation
194
+ metric: evaluation_metric,
195
+ num_threads: @config.num_threads,
196
+ max_errors: @config.max_errors
197
+ )
198
+ end
199
+
200
+ # Evaluate program performance on given examples
201
+ sig do
202
+ params(
203
+ program: T.untyped,
204
+ examples: T::Array[T.untyped],
205
+ metric: T.nilable(T.proc.params(arg0: T.untyped, arg1: T.untyped).returns(T.untyped))
206
+ ).returns(DSPy::Evaluate::BatchEvaluationResult)
207
+ end
208
+ def evaluate_program(program, examples, metric: nil)
209
+ evaluation_metric = metric || @metric || default_metric_for_examples(examples)
210
+
211
+ evaluator = DSPy::Evaluate.new(
212
+ program,
213
+ metric: evaluation_metric,
214
+ num_threads: @config.num_threads,
215
+ max_errors: @config.max_errors
216
+ )
217
+
218
+ evaluator.evaluate(examples, display_progress: false)
219
+ end
220
+
221
+ # Save optimization results if configured
222
+ sig { params(result: OptimizationResult).void }
223
+ def save_results(result)
224
+ # Legacy file-based saving
225
+ if @config.save_intermediate_results && @config.save_path
226
+ File.open(@config.save_path, 'w') do |f|
227
+ f.write(JSON.pretty_generate(result.to_h))
228
+ end
229
+ end
230
+
231
+ # Modern storage system integration
232
+ if @config.save_intermediate_results
233
+ storage_manager = DSPy::Storage::StorageManager.instance
234
+ storage_manager.save_optimization_result(
235
+ result,
236
+ tags: [self.class.name.split('::').last.downcase],
237
+ description: "Optimization by #{self.class.name}",
238
+ metadata: {
239
+ teleprompter_class: self.class.name,
240
+ config: @config.to_h,
241
+ optimization_duration: result.metadata[:optimization_duration] || 0
242
+ }
243
+ )
244
+ end
245
+
246
+ # Registry system integration for version management
247
+ if @config.save_intermediate_results
248
+ registry_manager = DSPy::Registry::RegistryManager.instance
249
+ registry_manager.register_optimization_result(
250
+ result,
251
+ metadata: {
252
+ teleprompter_class: self.class.name,
253
+ config: @config.to_h
254
+ }
255
+ )
256
+ end
257
+ end
258
+
259
+ protected
260
+
261
+ # Validate that examples are in the correct format
262
+ sig { params(examples: T.nilable(T::Array[T.untyped]), context: String).void }
263
+ def validate_examples(examples, context)
264
+ return unless examples
265
+
266
+ examples.each_with_index do |example, index|
267
+ validate_single_example(example, "#{context} example #{index}")
268
+ end
269
+ end
270
+
271
+ # Validate a single example
272
+ sig { params(example: T.untyped, context: String).void }
273
+ def validate_single_example(example, context)
274
+ case example
275
+ when DSPy::Example
276
+ # Already validated
277
+ return
278
+ when Hash
279
+ # Only support structured format with :input and :expected keys
280
+ if example.key?(:input) && example.key?(:expected)
281
+ return
282
+ elsif example.key?('input') && example.key?('expected')
283
+ return
284
+ end
285
+ else
286
+ # Check if it's an object with the right methods
287
+ return if example.respond_to?(:input) && example.respond_to?(:expected)
288
+ end
289
+
290
+ raise ArgumentError, "Invalid #{context}: must be DSPy::Example or structured hash with :input and :expected keys. Legacy flat format is no longer supported."
291
+ end
292
+
293
+
294
+ # Infer signature class from examples
295
+ sig { params(examples: T::Array[T.untyped]).returns(T.nilable(T.class_of(Signature))) }
296
+ def infer_signature_class(examples)
297
+ require_relative 'utils'
298
+ Utils.infer_signature_class(examples)
299
+ end
300
+
301
+ # Create a default metric for examples
302
+ sig { params(examples: T::Array[T.untyped]).returns(T.nilable(T.proc.params(arg0: T.untyped, arg1: T.untyped).returns(T::Boolean))) }
303
+ def default_metric_for_examples(examples)
304
+ # For DSPy::Example objects, use built-in matching
305
+ if examples.first.is_a?(DSPy::Example)
306
+ proc { |example, prediction| example.matches_prediction?(prediction) }
307
+ else
308
+ # For other formats, no default metric
309
+ nil
310
+ end
311
+ end
312
+
313
+ # Instrument optimization steps
314
+ sig { params(step_name: String, payload: T::Hash[Symbol, T.untyped], block: T.proc.returns(T.untyped)).returns(T.untyped) }
315
+ def instrument_step(step_name, payload = {}, &block)
316
+ event_name = "dspy.optimization.#{step_name}"
317
+
318
+ Instrumentation.instrument(event_name, payload.merge({
319
+ teleprompter_class: self.class.name,
320
+ config: @config.to_h
321
+ }), &block)
322
+ end
323
+
324
+ # Emit optimization events
325
+ sig { params(event_name: String, payload: T::Hash[Symbol, T.untyped]).void }
326
+ def emit_event(event_name, payload = {})
327
+ full_event_name = "dspy.optimization.#{event_name}"
328
+
329
+ Instrumentation.emit(full_event_name, payload.merge({
330
+ teleprompter_class: self.class.name,
331
+ timestamp: Time.now.iso8601
332
+ }))
333
+ end
334
+ end
335
+ end
336
+ end