dspy 0.28.2 → 0.29.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (41) hide show
  1. checksums.yaml +4 -4
  2. data/README.md +2 -3
  3. data/lib/dspy/code_act.rb +14 -1
  4. data/lib/dspy/datasets/ade.rb +90 -0
  5. data/lib/dspy/datasets.rb +8 -0
  6. data/lib/dspy/lm.rb +4 -8
  7. data/lib/dspy/mixins/struct_builder.rb +17 -25
  8. data/lib/dspy/module.rb +12 -1
  9. data/lib/dspy/observability/async_span_processor.rb +67 -93
  10. data/lib/dspy/observability.rb +43 -1
  11. data/lib/dspy/predict.rb +10 -0
  12. data/lib/dspy/propose/dataset_summary_generator.rb +36 -3
  13. data/lib/dspy/propose/grounded_proposer.rb +118 -11
  14. data/lib/dspy/re_act.rb +13 -0
  15. data/lib/dspy/reflection_lm.rb +36 -0
  16. data/lib/dspy/teleprompt/gepa.rb +448 -2803
  17. data/lib/dspy/teleprompt/mipro_v2.rb +564 -65
  18. data/lib/dspy/teleprompt/utils.rb +8 -3
  19. data/lib/dspy/version.rb +2 -2
  20. data/lib/dspy.rb +3 -2
  21. data/lib/gepa/api.rb +61 -0
  22. data/lib/gepa/core/engine.rb +226 -0
  23. data/lib/gepa/core/evaluation_batch.rb +26 -0
  24. data/lib/gepa/core/result.rb +92 -0
  25. data/lib/gepa/core/state.rb +231 -0
  26. data/lib/gepa/logging/experiment_tracker.rb +54 -0
  27. data/lib/gepa/logging/logger.rb +57 -0
  28. data/lib/gepa/logging.rb +9 -0
  29. data/lib/gepa/proposer/base.rb +27 -0
  30. data/lib/gepa/proposer/merge_proposer.rb +424 -0
  31. data/lib/gepa/proposer/reflective_mutation/base.rb +48 -0
  32. data/lib/gepa/proposer/reflective_mutation/reflective_mutation.rb +188 -0
  33. data/lib/gepa/strategies/batch_sampler.rb +91 -0
  34. data/lib/gepa/strategies/candidate_selector.rb +97 -0
  35. data/lib/gepa/strategies/component_selector.rb +57 -0
  36. data/lib/gepa/strategies/instruction_proposal.rb +120 -0
  37. data/lib/gepa/telemetry.rb +122 -0
  38. data/lib/gepa/utils/pareto.rb +119 -0
  39. data/lib/gepa.rb +21 -0
  40. metadata +42 -4
  41. data/lib/dspy/teleprompt/simple_optimizer.rb +0 -503
@@ -1,503 +0,0 @@
1
- # frozen_string_literal: true
2
-
3
- require 'sorbet-runtime'
4
- require_relative 'teleprompter'
5
- require_relative 'utils'
6
- require_relative '../propose/grounded_proposer'
7
-
8
- module DSPy
9
- module Teleprompt
10
- # Simple optimization algorithm using random/grid search
11
- # Uses grounded proposer for instruction generation and bootstrap for examples
12
- class SimpleOptimizer < Teleprompter
13
- extend T::Sig
14
-
15
- # Configuration specific to simple optimization
16
- class OptimizerConfig < Config
17
- extend T::Sig
18
-
19
- sig { returns(Integer) }
20
- attr_accessor :num_trials
21
-
22
- sig { returns(String) }
23
- attr_accessor :search_strategy
24
-
25
- sig { returns(T::Boolean) }
26
- attr_accessor :use_instruction_optimization
27
-
28
- sig { returns(T::Boolean) }
29
- attr_accessor :use_few_shot_optimization
30
-
31
- sig { returns(DSPy::Propose::GroundedProposer::Config) }
32
- attr_accessor :proposer_config
33
-
34
- sig { void }
35
- def initialize
36
- super
37
- @num_trials = 10
38
- @search_strategy = "random" # or "grid"
39
- @use_instruction_optimization = true
40
- @use_few_shot_optimization = true
41
- @proposer_config = DSPy::Propose::GroundedProposer::Config.new
42
- end
43
- end
44
-
45
- # Result of a single optimization trial
46
- class TrialResult
47
- extend T::Sig
48
-
49
- sig { returns(Integer) }
50
- attr_reader :trial_number
51
-
52
- sig { returns(T.untyped) }
53
- attr_reader :program
54
-
55
- sig { returns(String) }
56
- attr_reader :instruction
57
-
58
- sig { returns(T::Array[T.untyped]) }
59
- attr_reader :few_shot_examples
60
-
61
- sig { returns(DSPy::Evaluate::BatchEvaluationResult) }
62
- attr_reader :evaluation_result
63
-
64
- sig { returns(Float) }
65
- attr_reader :score
66
-
67
- sig { returns(T::Hash[Symbol, T.untyped]) }
68
- attr_reader :metadata
69
-
70
- sig do
71
- params(
72
- trial_number: Integer,
73
- program: T.untyped,
74
- instruction: String,
75
- few_shot_examples: T::Array[T.untyped],
76
- evaluation_result: DSPy::Evaluate::BatchEvaluationResult,
77
- score: Float,
78
- metadata: T::Hash[Symbol, T.untyped]
79
- ).void
80
- end
81
- def initialize(trial_number:, program:, instruction:, few_shot_examples:, evaluation_result:, score:, metadata:)
82
- @trial_number = trial_number
83
- @program = program
84
- @instruction = instruction
85
- @few_shot_examples = few_shot_examples
86
- @evaluation_result = evaluation_result
87
- @score = score
88
- @metadata = metadata.freeze
89
- end
90
-
91
- sig { returns(T::Boolean) }
92
- def successful?
93
- @score > 0.0
94
- end
95
- end
96
-
97
- sig { returns(OptimizerConfig) }
98
- attr_reader :optimizer_config
99
-
100
- sig { returns(T.nilable(DSPy::Propose::GroundedProposer)) }
101
- attr_reader :proposer
102
-
103
- sig do
104
- params(
105
- metric: T.nilable(T.proc.params(arg0: T.untyped, arg1: T.untyped).returns(T.untyped)),
106
- config: T.nilable(OptimizerConfig)
107
- ).void
108
- end
109
- def initialize(metric: nil, config: nil)
110
- @optimizer_config = config || OptimizerConfig.new
111
- super(metric: metric, config: @optimizer_config)
112
-
113
- @proposer = if @optimizer_config.use_instruction_optimization
114
- DSPy::Propose::GroundedProposer.new(config: @optimizer_config.proposer_config)
115
- else
116
- nil
117
- end
118
- end
119
-
120
- # Main optimization method
121
- sig do
122
- params(
123
- program: T.untyped,
124
- trainset: T::Array[T.untyped],
125
- valset: T.nilable(T::Array[T.untyped])
126
- ).returns(OptimizationResult)
127
- end
128
- def compile(program, trainset:, valset: nil)
129
- validate_inputs(program, trainset, valset)
130
-
131
- instrument_step('compile', {
132
- trainset_size: trainset.size,
133
- valset_size: valset&.size || 0,
134
- num_trials: @optimizer_config.num_trials,
135
- search_strategy: @optimizer_config.search_strategy
136
- }) do
137
- # Convert examples to typed format
138
- typed_trainset = ensure_typed_examples(trainset)
139
- typed_valset = valset ? ensure_typed_examples(valset) : nil
140
-
141
- # Use validation set if available, otherwise use part of training set
142
- evaluation_set = typed_valset || typed_trainset.take(10)
143
-
144
- # Bootstrap few-shot examples if enabled
145
- demo_candidates = nil
146
- if @optimizer_config.use_few_shot_optimization
147
- demo_candidates = bootstrap_examples(program, typed_trainset)
148
- end
149
-
150
- # Generate instruction candidates if enabled
151
- instruction_candidates = []
152
- if @optimizer_config.use_instruction_optimization && @proposer
153
- instruction_candidates = generate_instruction_candidates(program, typed_trainset, demo_candidates)
154
- end
155
-
156
- # Run optimization trials
157
- trials = run_optimization_trials(
158
- program,
159
- evaluation_set,
160
- instruction_candidates,
161
- demo_candidates
162
- )
163
-
164
- # Find best trial
165
- best_trial = find_best_trial(trials)
166
-
167
- # Build optimization result
168
- optimization_result = build_optimization_result(best_trial, trials)
169
-
170
- save_results(optimization_result)
171
- optimization_result
172
- end
173
- end
174
-
175
- private
176
-
177
- # Bootstrap few-shot examples from training set
178
- sig { params(program: T.untyped, trainset: T::Array[DSPy::Example]).returns(T::Hash[Integer, T::Array[T::Array[DSPy::FewShotExample]]]) }
179
- def bootstrap_examples(program, trainset)
180
- num_candidate_sets = [@optimizer_config.num_trials / 2, 5].max
181
-
182
- Utils.create_n_fewshot_demo_sets(
183
- program,
184
- num_candidate_sets,
185
- trainset,
186
- max_bootstrapped_demos: @optimizer_config.max_bootstrapped_examples,
187
- max_labeled_demos: @optimizer_config.max_labeled_examples,
188
- metric: @metric
189
- )
190
- end
191
-
192
- # Generate instruction candidates using the proposer
193
- sig do
194
- params(
195
- program: T.untyped,
196
- trainset: T::Array[DSPy::Example],
197
- demo_candidates: T.nilable(T::Hash[Integer, T::Array[T::Array[DSPy::FewShotExample]]])
198
- ).returns(T::Array[String])
199
- end
200
- def generate_instruction_candidates(program, trainset, demo_candidates)
201
- return [] unless @proposer
202
-
203
- # Get current instruction if available
204
- current_instruction = extract_current_instruction(program)
205
-
206
- # Use few-shot examples from bootstrap if available
207
- # Flatten demo sets from first predictor and take first 5 examples
208
- few_shot_examples = demo_candidates&.dig(0)&.flatten&.take(5) || []
209
-
210
- # Get signature class from program
211
- signature_class = extract_signature_class(program)
212
- return [] unless signature_class
213
-
214
- proposal_result = @proposer.propose_instructions(
215
- signature_class,
216
- trainset,
217
- few_shot_examples: few_shot_examples,
218
- current_instruction: current_instruction
219
- )
220
-
221
- proposal_result.candidate_instructions
222
- end
223
-
224
- # Run optimization trials with different configurations
225
- sig do
226
- params(
227
- program: T.untyped,
228
- evaluation_set: T::Array[DSPy::Example],
229
- instruction_candidates: T::Array[String],
230
- demo_candidates: T.nilable(T::Hash[Integer, T::Array[T::Array[DSPy::FewShotExample]]])
231
- ).returns(T::Array[TrialResult])
232
- end
233
- def run_optimization_trials(program, evaluation_set, instruction_candidates, demo_candidates)
234
- trials = []
235
-
236
- # Generate trial configurations
237
- trial_configs = generate_trial_configurations(instruction_candidates, demo_candidates)
238
-
239
- trial_configs.take(@optimizer_config.num_trials).each_with_index do |config, index|
240
- trial_number = index + 1
241
-
242
- emit_event('trial_start', {
243
- trial_number: trial_number,
244
- instruction: config[:instruction],
245
- num_few_shot: config[:few_shot_examples]&.size || 0
246
- })
247
-
248
- begin
249
- trial_result = run_single_trial(program, evaluation_set, config, trial_number)
250
- trials << trial_result
251
-
252
- emit_event('trial_complete', {
253
- trial_number: trial_number,
254
- score: trial_result.score,
255
- successful: trial_result.successful?,
256
- duration_ms: trial_result.metadata[:duration_ms] || 0
257
- })
258
- rescue => error
259
- emit_event('error', {
260
- trial_number: trial_number,
261
- error_type: error.class.name,
262
- error_message: error.message
263
- })
264
-
265
- DSPy.logger.error("Trial #{trial_number} failed: #{error.message}")
266
- end
267
- end
268
-
269
- trials
270
- end
271
-
272
- # Generate configurations for trials
273
- sig do
274
- params(
275
- instruction_candidates: T::Array[String],
276
- demo_candidates: T.nilable(T::Hash[Integer, T::Array[T::Array[DSPy::FewShotExample]]])
277
- ).returns(T::Array[T::Hash[Symbol, T.untyped]])
278
- end
279
- def generate_trial_configurations(instruction_candidates, demo_candidates)
280
- configs = []
281
-
282
- # Extract demo sets from first predictor
283
- demo_sets = demo_candidates&.dig(0) || []
284
-
285
- # Base configuration (no changes)
286
- configs << { instruction: nil, few_shot_examples: [] }
287
-
288
- # Instruction-only trials
289
- instruction_candidates.each do |instruction|
290
- configs << { instruction: instruction, few_shot_examples: [] }
291
- end
292
-
293
- # Few-shot only trials
294
- if demo_sets.any?
295
- demo_sets.each do |demo_set|
296
- configs << { instruction: nil, few_shot_examples: demo_set }
297
- end
298
- end
299
-
300
- # Combined instruction + few-shot trials
301
- if instruction_candidates.any? && demo_sets.any?
302
- instruction_candidates.take(3).each do |instruction|
303
- demo_sets.take(2).each do |demo_set|
304
- configs << { instruction: instruction, few_shot_examples: demo_set }
305
- end
306
- end
307
- end
308
-
309
- # Shuffle for random strategy
310
- if @optimizer_config.search_strategy == "random"
311
- configs.shuffle
312
- else
313
- configs
314
- end
315
- end
316
-
317
- # Run a single optimization trial
318
- sig do
319
- params(
320
- program: T.untyped,
321
- evaluation_set: T::Array[DSPy::Example],
322
- config: T::Hash[Symbol, T.untyped],
323
- trial_number: Integer
324
- ).returns(TrialResult)
325
- end
326
- def run_single_trial(program, evaluation_set, config, trial_number)
327
- start_time = Process.clock_gettime(Process::CLOCK_MONOTONIC)
328
-
329
- # Create modified program
330
- modified_program = apply_trial_configuration(program, config)
331
-
332
- # Evaluate the modified program
333
- evaluation_result = evaluate_program(modified_program, evaluation_set)
334
-
335
- # Calculate score (using pass_rate as primary metric)
336
- score = evaluation_result.pass_rate
337
-
338
- end_time = Process.clock_gettime(Process::CLOCK_MONOTONIC)
339
- duration_ms = ((end_time - start_time) * 1000).round(2)
340
-
341
- metadata = {
342
- duration_ms: duration_ms,
343
- num_examples_evaluated: evaluation_result.total_examples,
344
- instruction_length: config[:instruction]&.length || 0,
345
- num_few_shot_examples: config[:few_shot_examples]&.size || 0
346
- }
347
-
348
- TrialResult.new(
349
- trial_number: trial_number,
350
- program: modified_program,
351
- instruction: config[:instruction] || "",
352
- few_shot_examples: config[:few_shot_examples] || [],
353
- evaluation_result: evaluation_result,
354
- score: score,
355
- metadata: metadata
356
- )
357
- end
358
-
359
- # Apply trial configuration to program
360
- sig { params(program: T.untyped, config: T::Hash[Symbol, T.untyped]).returns(T.untyped) }
361
- def apply_trial_configuration(program, config)
362
- modified_program = program
363
-
364
- # Apply instruction modification
365
- if config[:instruction] && respond_to_instruction_modification?(program)
366
- modified_program = apply_instruction_modification(modified_program, config[:instruction])
367
- end
368
-
369
- # Apply few-shot examples
370
- if config[:few_shot_examples]&.any? && respond_to_few_shot_modification?(program)
371
- modified_program = apply_few_shot_modification(modified_program, config[:few_shot_examples])
372
- end
373
-
374
- modified_program
375
- end
376
-
377
- # Apply instruction modification to program
378
- sig { params(program: T.untyped, instruction: String).returns(T.untyped) }
379
- def apply_instruction_modification(program, instruction)
380
- if program.respond_to?(:with_instruction)
381
- program.with_instruction(instruction)
382
- else
383
- program
384
- end
385
- end
386
-
387
- # Apply few-shot examples to program
388
- sig { params(program: T.untyped, examples: T::Array[T.untyped]).returns(T.untyped) }
389
- def apply_few_shot_modification(program, examples)
390
- if program.respond_to?(:with_examples)
391
- # Convert to FewShotExample format
392
- few_shot_examples = examples.map do |example|
393
- DSPy::FewShotExample.new(
394
- input: example.input_values,
395
- output: example.expected_values,
396
- reasoning: extract_reasoning_from_example(example)
397
- )
398
- end
399
- program.with_examples(few_shot_examples)
400
- else
401
- program
402
- end
403
- end
404
-
405
- # Find the best trial based on score
406
- sig { params(trials: T::Array[TrialResult]).returns(T.nilable(TrialResult)) }
407
- def find_best_trial(trials)
408
- return nil if trials.empty?
409
-
410
- trials.max_by(&:score)
411
- end
412
-
413
- # Build the final optimization result
414
- sig { params(best_trial: T.nilable(TrialResult), all_trials: T::Array[TrialResult]).returns(OptimizationResult) }
415
- def build_optimization_result(best_trial, all_trials)
416
- if best_trial
417
- scores = { pass_rate: best_trial.score }
418
- history = {
419
- total_trials: all_trials.size,
420
- successful_trials: all_trials.count(&:successful?),
421
- trial_scores: all_trials.map(&:score),
422
- best_trial_number: best_trial.trial_number
423
- }
424
-
425
- OptimizationResult.new(
426
- optimized_program: best_trial.program,
427
- scores: scores,
428
- history: history,
429
- best_score_name: "pass_rate",
430
- best_score_value: best_trial.score,
431
- metadata: {
432
- optimizer: "SimpleOptimizer",
433
- search_strategy: @optimizer_config.search_strategy,
434
- num_trials: @optimizer_config.num_trials,
435
- best_instruction: best_trial.instruction,
436
- best_num_few_shot: best_trial.few_shot_examples.size,
437
- optimization_timestamp: Time.now.iso8601
438
- }
439
- )
440
- else
441
- # No successful trials
442
- OptimizationResult.new(
443
- optimized_program: nil,
444
- scores: { pass_rate: 0.0 },
445
- history: { total_trials: all_trials.size, successful_trials: 0 },
446
- best_score_name: "pass_rate",
447
- best_score_value: 0.0,
448
- metadata: { optimizer: "SimpleOptimizer", error: "No successful trials" }
449
- )
450
- end
451
- end
452
-
453
- # Helper methods for program introspection
454
- sig { params(program: T.untyped).returns(T.nilable(String)) }
455
- def extract_current_instruction(program)
456
- if program.respond_to?(:prompt) && program.prompt.respond_to?(:instruction)
457
- program.prompt.instruction
458
- elsif program.respond_to?(:system_signature)
459
- # Try to extract from system signature
460
- system_sig = program.system_signature
461
- system_sig.is_a?(String) ? system_sig : nil
462
- else
463
- nil
464
- end
465
- end
466
-
467
- sig { params(program: T.untyped).returns(T.nilable(T.class_of(DSPy::Signature))) }
468
- def extract_signature_class(program)
469
- if program.respond_to?(:signature_class)
470
- program.signature_class
471
- else
472
- nil
473
- end
474
- end
475
-
476
- sig { params(program: T.untyped).returns(T::Boolean) }
477
- def respond_to_instruction_modification?(program)
478
- program.respond_to?(:with_instruction)
479
- end
480
-
481
- sig { params(program: T.untyped).returns(T::Boolean) }
482
- def respond_to_few_shot_modification?(program)
483
- program.respond_to?(:with_examples)
484
- end
485
-
486
- sig { params(example: T.untyped).returns(T.nilable(String)) }
487
- def extract_reasoning_from_example(example)
488
- case example
489
- when DSPy::Example
490
- if example.expected_values.key?(:reasoning)
491
- example.expected_values[:reasoning]
492
- elsif example.expected_values.key?(:explanation)
493
- example.expected_values[:explanation]
494
- else
495
- nil
496
- end
497
- else
498
- nil
499
- end
500
- end
501
- end
502
- end
503
- end