dspy 0.3.1 → 0.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,497 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'sorbet-runtime'
4
+ require_relative 'teleprompter'
5
+ require_relative 'utils'
6
+ require_relative '../propose/grounded_proposer'
7
+
8
+ module DSPy
9
+ module Teleprompt
10
+ # Simple optimization algorithm using random/grid search
11
+ # Uses grounded proposer for instruction generation and bootstrap for examples
12
+ class SimpleOptimizer < Teleprompter
13
+ extend T::Sig
14
+
15
+ # Configuration specific to simple optimization
16
+ class OptimizerConfig < Config
17
+ extend T::Sig
18
+
19
+ sig { returns(Integer) }
20
+ attr_accessor :num_trials
21
+
22
+ sig { returns(String) }
23
+ attr_accessor :search_strategy
24
+
25
+ sig { returns(T::Boolean) }
26
+ attr_accessor :use_instruction_optimization
27
+
28
+ sig { returns(T::Boolean) }
29
+ attr_accessor :use_few_shot_optimization
30
+
31
+ sig { returns(DSPy::Propose::GroundedProposer::Config) }
32
+ attr_accessor :proposer_config
33
+
34
+ sig { void }
35
+ def initialize
36
+ super
37
+ @num_trials = 10
38
+ @search_strategy = "random" # or "grid"
39
+ @use_instruction_optimization = true
40
+ @use_few_shot_optimization = true
41
+ @proposer_config = DSPy::Propose::GroundedProposer::Config.new
42
+ end
43
+ end
44
+
45
+ # Result of a single optimization trial
46
+ class TrialResult
47
+ extend T::Sig
48
+
49
+ sig { returns(Integer) }
50
+ attr_reader :trial_number
51
+
52
+ sig { returns(T.untyped) }
53
+ attr_reader :program
54
+
55
+ sig { returns(String) }
56
+ attr_reader :instruction
57
+
58
+ sig { returns(T::Array[T.untyped]) }
59
+ attr_reader :few_shot_examples
60
+
61
+ sig { returns(DSPy::Evaluate::BatchEvaluationResult) }
62
+ attr_reader :evaluation_result
63
+
64
+ sig { returns(Float) }
65
+ attr_reader :score
66
+
67
+ sig { returns(T::Hash[Symbol, T.untyped]) }
68
+ attr_reader :metadata
69
+
70
+ sig do
71
+ params(
72
+ trial_number: Integer,
73
+ program: T.untyped,
74
+ instruction: String,
75
+ few_shot_examples: T::Array[T.untyped],
76
+ evaluation_result: DSPy::Evaluate::BatchEvaluationResult,
77
+ score: Float,
78
+ metadata: T::Hash[Symbol, T.untyped]
79
+ ).void
80
+ end
81
+ def initialize(trial_number:, program:, instruction:, few_shot_examples:, evaluation_result:, score:, metadata:)
82
+ @trial_number = trial_number
83
+ @program = program
84
+ @instruction = instruction
85
+ @few_shot_examples = few_shot_examples
86
+ @evaluation_result = evaluation_result
87
+ @score = score
88
+ @metadata = metadata.freeze
89
+ end
90
+
91
+ sig { returns(T::Boolean) }
92
+ def successful?
93
+ @score > 0.0
94
+ end
95
+ end
96
+
97
+ sig { returns(OptimizerConfig) }
98
+ attr_reader :optimizer_config
99
+
100
+ sig { returns(T.nilable(DSPy::Propose::GroundedProposer)) }
101
+ attr_reader :proposer
102
+
103
+ sig do
104
+ params(
105
+ metric: T.nilable(T.proc.params(arg0: T.untyped, arg1: T.untyped).returns(T.untyped)),
106
+ config: T.nilable(OptimizerConfig)
107
+ ).void
108
+ end
109
+ def initialize(metric: nil, config: nil)
110
+ @optimizer_config = config || OptimizerConfig.new
111
+ super(metric: metric, config: @optimizer_config)
112
+
113
+ @proposer = if @optimizer_config.use_instruction_optimization
114
+ DSPy::Propose::GroundedProposer.new(config: @optimizer_config.proposer_config)
115
+ else
116
+ nil
117
+ end
118
+ end
119
+
120
+ # Main optimization method
121
+ sig do
122
+ params(
123
+ program: T.untyped,
124
+ trainset: T::Array[T.untyped],
125
+ valset: T.nilable(T::Array[T.untyped])
126
+ ).returns(OptimizationResult)
127
+ end
128
+ def compile(program, trainset:, valset: nil)
129
+ validate_inputs(program, trainset, valset)
130
+
131
+ instrument_step('compile', {
132
+ trainset_size: trainset.size,
133
+ valset_size: valset&.size || 0,
134
+ num_trials: @optimizer_config.num_trials,
135
+ search_strategy: @optimizer_config.search_strategy
136
+ }) do
137
+ # Convert examples to typed format
138
+ typed_trainset = ensure_typed_examples(trainset)
139
+ typed_valset = valset ? ensure_typed_examples(valset) : nil
140
+
141
+ # Use validation set if available, otherwise use part of training set
142
+ evaluation_set = typed_valset || typed_trainset.take(10)
143
+
144
+ # Bootstrap few-shot examples if enabled
145
+ bootstrap_result = nil
146
+ if @optimizer_config.use_few_shot_optimization
147
+ bootstrap_result = bootstrap_examples(program, typed_trainset)
148
+ end
149
+
150
+ # Generate instruction candidates if enabled
151
+ instruction_candidates = []
152
+ if @optimizer_config.use_instruction_optimization && @proposer
153
+ instruction_candidates = generate_instruction_candidates(program, typed_trainset, bootstrap_result)
154
+ end
155
+
156
+ # Run optimization trials
157
+ trials = run_optimization_trials(
158
+ program,
159
+ evaluation_set,
160
+ instruction_candidates,
161
+ bootstrap_result
162
+ )
163
+
164
+ # Find best trial
165
+ best_trial = find_best_trial(trials)
166
+
167
+ # Build optimization result
168
+ optimization_result = build_optimization_result(best_trial, trials)
169
+
170
+ save_results(optimization_result)
171
+ optimization_result
172
+ end
173
+ end
174
+
175
+ private
176
+
177
+ # Bootstrap few-shot examples from training set
178
+ sig { params(program: T.untyped, trainset: T::Array[DSPy::Example]).returns(Utils::BootstrapResult) }
179
+ def bootstrap_examples(program, trainset)
180
+ bootstrap_config = Utils::BootstrapConfig.new
181
+ bootstrap_config.max_bootstrapped_examples = @optimizer_config.max_bootstrapped_examples
182
+ bootstrap_config.max_labeled_examples = @optimizer_config.max_labeled_examples
183
+ bootstrap_config.num_candidate_sets = [@optimizer_config.num_trials / 2, 5].max
184
+ bootstrap_config.max_errors = @optimizer_config.max_errors
185
+ bootstrap_config.num_threads = @optimizer_config.num_threads
186
+
187
+ Utils.create_n_fewshot_demo_sets(program, trainset, config: bootstrap_config, metric: @metric)
188
+ end
189
+
190
+ # Generate instruction candidates using the proposer
191
+ sig do
192
+ params(
193
+ program: T.untyped,
194
+ trainset: T::Array[DSPy::Example],
195
+ bootstrap_result: T.nilable(Utils::BootstrapResult)
196
+ ).returns(T::Array[String])
197
+ end
198
+ def generate_instruction_candidates(program, trainset, bootstrap_result)
199
+ return [] unless @proposer
200
+
201
+ # Get current instruction if available
202
+ current_instruction = extract_current_instruction(program)
203
+
204
+ # Use few-shot examples from bootstrap if available
205
+ few_shot_examples = bootstrap_result&.successful_examples&.take(5)
206
+
207
+ # Get signature class from program
208
+ signature_class = extract_signature_class(program)
209
+ return [] unless signature_class
210
+
211
+ proposal_result = @proposer.propose_instructions(
212
+ signature_class,
213
+ trainset,
214
+ few_shot_examples: few_shot_examples,
215
+ current_instruction: current_instruction
216
+ )
217
+
218
+ proposal_result.candidate_instructions
219
+ end
220
+
221
+ # Run optimization trials with different configurations
222
+ sig do
223
+ params(
224
+ program: T.untyped,
225
+ evaluation_set: T::Array[DSPy::Example],
226
+ instruction_candidates: T::Array[String],
227
+ bootstrap_result: T.nilable(Utils::BootstrapResult)
228
+ ).returns(T::Array[TrialResult])
229
+ end
230
+ def run_optimization_trials(program, evaluation_set, instruction_candidates, bootstrap_result)
231
+ trials = []
232
+
233
+ # Generate trial configurations
234
+ trial_configs = generate_trial_configurations(instruction_candidates, bootstrap_result)
235
+
236
+ trial_configs.take(@optimizer_config.num_trials).each_with_index do |config, index|
237
+ trial_number = index + 1
238
+
239
+ emit_event('trial_start', {
240
+ trial_number: trial_number,
241
+ instruction: config[:instruction],
242
+ num_few_shot: config[:few_shot_examples]&.size || 0
243
+ })
244
+
245
+ begin
246
+ trial_result = run_single_trial(program, evaluation_set, config, trial_number)
247
+ trials << trial_result
248
+
249
+ emit_event('trial_complete', {
250
+ trial_number: trial_number,
251
+ score: trial_result.score,
252
+ successful: trial_result.successful?,
253
+ duration_ms: trial_result.metadata[:duration_ms] || 0
254
+ })
255
+ rescue => error
256
+ emit_event('error', {
257
+ trial_number: trial_number,
258
+ error_type: error.class.name,
259
+ error_message: error.message
260
+ })
261
+
262
+ DSPy.logger.error("Trial #{trial_number} failed: #{error.message}")
263
+ end
264
+ end
265
+
266
+ trials
267
+ end
268
+
269
+ # Generate configurations for trials
270
+ sig do
271
+ params(
272
+ instruction_candidates: T::Array[String],
273
+ bootstrap_result: T.nilable(Utils::BootstrapResult)
274
+ ).returns(T::Array[T::Hash[Symbol, T.untyped]])
275
+ end
276
+ def generate_trial_configurations(instruction_candidates, bootstrap_result)
277
+ configs = []
278
+
279
+ # Base configuration (no changes)
280
+ configs << { instruction: nil, few_shot_examples: [] }
281
+
282
+ # Instruction-only trials
283
+ instruction_candidates.each do |instruction|
284
+ configs << { instruction: instruction, few_shot_examples: [] }
285
+ end
286
+
287
+ # Few-shot only trials
288
+ if bootstrap_result&.candidate_sets&.any?
289
+ bootstrap_result.candidate_sets.each do |candidate_set|
290
+ configs << { instruction: nil, few_shot_examples: candidate_set }
291
+ end
292
+ end
293
+
294
+ # Combined instruction + few-shot trials
295
+ if instruction_candidates.any? && bootstrap_result&.candidate_sets&.any?
296
+ instruction_candidates.take(3).each do |instruction|
297
+ bootstrap_result.candidate_sets.take(2).each do |candidate_set|
298
+ configs << { instruction: instruction, few_shot_examples: candidate_set }
299
+ end
300
+ end
301
+ end
302
+
303
+ # Shuffle for random strategy
304
+ if @optimizer_config.search_strategy == "random"
305
+ configs.shuffle
306
+ else
307
+ configs
308
+ end
309
+ end
310
+
311
+ # Run a single optimization trial
312
+ sig do
313
+ params(
314
+ program: T.untyped,
315
+ evaluation_set: T::Array[DSPy::Example],
316
+ config: T::Hash[Symbol, T.untyped],
317
+ trial_number: Integer
318
+ ).returns(TrialResult)
319
+ end
320
+ def run_single_trial(program, evaluation_set, config, trial_number)
321
+ start_time = Process.clock_gettime(Process::CLOCK_MONOTONIC)
322
+
323
+ # Create modified program
324
+ modified_program = apply_trial_configuration(program, config)
325
+
326
+ # Evaluate the modified program
327
+ evaluation_result = evaluate_program(modified_program, evaluation_set)
328
+
329
+ # Calculate score (using pass_rate as primary metric)
330
+ score = evaluation_result.pass_rate
331
+
332
+ end_time = Process.clock_gettime(Process::CLOCK_MONOTONIC)
333
+ duration_ms = ((end_time - start_time) * 1000).round(2)
334
+
335
+ metadata = {
336
+ duration_ms: duration_ms,
337
+ num_examples_evaluated: evaluation_result.total_examples,
338
+ instruction_length: config[:instruction]&.length || 0,
339
+ num_few_shot_examples: config[:few_shot_examples]&.size || 0
340
+ }
341
+
342
+ TrialResult.new(
343
+ trial_number: trial_number,
344
+ program: modified_program,
345
+ instruction: config[:instruction] || "",
346
+ few_shot_examples: config[:few_shot_examples] || [],
347
+ evaluation_result: evaluation_result,
348
+ score: score,
349
+ metadata: metadata
350
+ )
351
+ end
352
+
353
+ # Apply trial configuration to program
354
+ sig { params(program: T.untyped, config: T::Hash[Symbol, T.untyped]).returns(T.untyped) }
355
+ def apply_trial_configuration(program, config)
356
+ modified_program = program
357
+
358
+ # Apply instruction modification
359
+ if config[:instruction] && respond_to_instruction_modification?(program)
360
+ modified_program = apply_instruction_modification(modified_program, config[:instruction])
361
+ end
362
+
363
+ # Apply few-shot examples
364
+ if config[:few_shot_examples]&.any? && respond_to_few_shot_modification?(program)
365
+ modified_program = apply_few_shot_modification(modified_program, config[:few_shot_examples])
366
+ end
367
+
368
+ modified_program
369
+ end
370
+
371
+ # Apply instruction modification to program
372
+ sig { params(program: T.untyped, instruction: String).returns(T.untyped) }
373
+ def apply_instruction_modification(program, instruction)
374
+ if program.respond_to?(:with_instruction)
375
+ program.with_instruction(instruction)
376
+ else
377
+ program
378
+ end
379
+ end
380
+
381
+ # Apply few-shot examples to program
382
+ sig { params(program: T.untyped, examples: T::Array[T.untyped]).returns(T.untyped) }
383
+ def apply_few_shot_modification(program, examples)
384
+ if program.respond_to?(:with_examples)
385
+ # Convert to FewShotExample format
386
+ few_shot_examples = examples.map do |example|
387
+ DSPy::FewShotExample.new(
388
+ input: example.input_values,
389
+ output: example.expected_values,
390
+ reasoning: extract_reasoning_from_example(example)
391
+ )
392
+ end
393
+ program.with_examples(few_shot_examples)
394
+ else
395
+ program
396
+ end
397
+ end
398
+
399
+ # Find the best trial based on score
400
+ sig { params(trials: T::Array[TrialResult]).returns(T.nilable(TrialResult)) }
401
+ def find_best_trial(trials)
402
+ return nil if trials.empty?
403
+
404
+ trials.max_by(&:score)
405
+ end
406
+
407
+ # Build the final optimization result
408
+ sig { params(best_trial: T.nilable(TrialResult), all_trials: T::Array[TrialResult]).returns(OptimizationResult) }
409
+ def build_optimization_result(best_trial, all_trials)
410
+ if best_trial
411
+ scores = { pass_rate: best_trial.score }
412
+ history = {
413
+ total_trials: all_trials.size,
414
+ successful_trials: all_trials.count(&:successful?),
415
+ trial_scores: all_trials.map(&:score),
416
+ best_trial_number: best_trial.trial_number
417
+ }
418
+
419
+ OptimizationResult.new(
420
+ optimized_program: best_trial.program,
421
+ scores: scores,
422
+ history: history,
423
+ best_score_name: "pass_rate",
424
+ best_score_value: best_trial.score,
425
+ metadata: {
426
+ optimizer: "SimpleOptimizer",
427
+ search_strategy: @optimizer_config.search_strategy,
428
+ num_trials: @optimizer_config.num_trials,
429
+ best_instruction: best_trial.instruction,
430
+ best_num_few_shot: best_trial.few_shot_examples.size,
431
+ optimization_timestamp: Time.now.iso8601
432
+ }
433
+ )
434
+ else
435
+ # No successful trials
436
+ OptimizationResult.new(
437
+ optimized_program: nil,
438
+ scores: { pass_rate: 0.0 },
439
+ history: { total_trials: all_trials.size, successful_trials: 0 },
440
+ best_score_name: "pass_rate",
441
+ best_score_value: 0.0,
442
+ metadata: { optimizer: "SimpleOptimizer", error: "No successful trials" }
443
+ )
444
+ end
445
+ end
446
+
447
+ # Helper methods for program introspection
448
+ sig { params(program: T.untyped).returns(T.nilable(String)) }
449
+ def extract_current_instruction(program)
450
+ if program.respond_to?(:prompt) && program.prompt.respond_to?(:instruction)
451
+ program.prompt.instruction
452
+ elsif program.respond_to?(:system_signature)
453
+ # Try to extract from system signature
454
+ system_sig = program.system_signature
455
+ system_sig.is_a?(String) ? system_sig : nil
456
+ else
457
+ nil
458
+ end
459
+ end
460
+
461
+ sig { params(program: T.untyped).returns(T.nilable(T.class_of(DSPy::Signature))) }
462
+ def extract_signature_class(program)
463
+ if program.respond_to?(:signature_class)
464
+ program.signature_class
465
+ else
466
+ nil
467
+ end
468
+ end
469
+
470
+ sig { params(program: T.untyped).returns(T::Boolean) }
471
+ def respond_to_instruction_modification?(program)
472
+ program.respond_to?(:with_instruction)
473
+ end
474
+
475
+ sig { params(program: T.untyped).returns(T::Boolean) }
476
+ def respond_to_few_shot_modification?(program)
477
+ program.respond_to?(:with_examples)
478
+ end
479
+
480
+ sig { params(example: T.untyped).returns(T.nilable(String)) }
481
+ def extract_reasoning_from_example(example)
482
+ case example
483
+ when DSPy::Example
484
+ if example.expected_values.key?(:reasoning)
485
+ example.expected_values[:reasoning]
486
+ elsif example.expected_values.key?(:explanation)
487
+ example.expected_values[:explanation]
488
+ else
489
+ nil
490
+ end
491
+ else
492
+ nil
493
+ end
494
+ end
495
+ end
496
+ end
497
+ end