dspy 0.29.0 → 0.30.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (51) hide show
  1. checksums.yaml +4 -4
  2. data/LICENSE +45 -0
  3. data/README.md +121 -101
  4. data/lib/dspy/callbacks.rb +74 -19
  5. data/lib/dspy/context.rb +49 -4
  6. data/lib/dspy/errors.rb +19 -1
  7. data/lib/dspy/{datasets.rb → evals/version.rb} +2 -3
  8. data/lib/dspy/{evaluate.rb → evals.rb} +373 -110
  9. data/lib/dspy/mixins/instruction_updatable.rb +22 -0
  10. data/lib/dspy/observability.rb +40 -182
  11. data/lib/dspy/predict.rb +10 -2
  12. data/lib/dspy/propose/dataset_summary_generator.rb +28 -18
  13. data/lib/dspy/re_act.rb +21 -0
  14. data/lib/dspy/schema/sorbet_json_schema.rb +302 -0
  15. data/lib/dspy/schema/version.rb +7 -0
  16. data/lib/dspy/schema.rb +4 -0
  17. data/lib/dspy/structured_outputs_prompt.rb +48 -0
  18. data/lib/dspy/support/warning_filters.rb +27 -0
  19. data/lib/dspy/teleprompt/gepa.rb +9 -588
  20. data/lib/dspy/teleprompt/instruction_updates.rb +94 -0
  21. data/lib/dspy/teleprompt/teleprompter.rb +6 -6
  22. data/lib/dspy/teleprompt/utils.rb +5 -65
  23. data/lib/dspy/type_system/sorbet_json_schema.rb +2 -299
  24. data/lib/dspy/version.rb +1 -1
  25. data/lib/dspy.rb +33 -7
  26. metadata +14 -60
  27. data/lib/dspy/code_act.rb +0 -477
  28. data/lib/dspy/datasets/ade.rb +0 -90
  29. data/lib/dspy/observability/async_span_processor.rb +0 -250
  30. data/lib/dspy/observability/observation_type.rb +0 -65
  31. data/lib/dspy/optimizers/gaussian_process.rb +0 -141
  32. data/lib/dspy/teleprompt/mipro_v2.rb +0 -1423
  33. data/lib/gepa/api.rb +0 -61
  34. data/lib/gepa/core/engine.rb +0 -226
  35. data/lib/gepa/core/evaluation_batch.rb +0 -26
  36. data/lib/gepa/core/result.rb +0 -92
  37. data/lib/gepa/core/state.rb +0 -231
  38. data/lib/gepa/logging/experiment_tracker.rb +0 -54
  39. data/lib/gepa/logging/logger.rb +0 -57
  40. data/lib/gepa/logging.rb +0 -9
  41. data/lib/gepa/proposer/base.rb +0 -27
  42. data/lib/gepa/proposer/merge_proposer.rb +0 -424
  43. data/lib/gepa/proposer/reflective_mutation/base.rb +0 -48
  44. data/lib/gepa/proposer/reflective_mutation/reflective_mutation.rb +0 -188
  45. data/lib/gepa/strategies/batch_sampler.rb +0 -91
  46. data/lib/gepa/strategies/candidate_selector.rb +0 -97
  47. data/lib/gepa/strategies/component_selector.rb +0 -57
  48. data/lib/gepa/strategies/instruction_proposal.rb +0 -120
  49. data/lib/gepa/telemetry.rb +0 -122
  50. data/lib/gepa/utils/pareto.rb +0 -119
  51. data/lib/gepa.rb +0 -21
@@ -1,1423 +0,0 @@
1
- # frozen_string_literal: true
2
-
3
- require 'digest'
4
- require 'time'
5
- require 'concurrent-ruby'
6
- require 'sorbet-runtime'
7
- require 'securerandom'
8
- require_relative 'teleprompter'
9
- require_relative 'utils'
10
- require_relative '../propose/grounded_proposer'
11
- require_relative '../optimizers/gaussian_process'
12
-
13
- module DSPy
14
- module Teleprompt
15
- # Enum for candidate configuration types
16
- class CandidateType < T::Enum
17
- enums do
18
- Baseline = new("baseline")
19
- InstructionOnly = new("instruction_only")
20
- FewShotOnly = new("few_shot_only")
21
- Combined = new("combined")
22
- end
23
- end
24
-
25
- # Enum for optimization strategies
26
- class OptimizationStrategy < T::Enum
27
- enums do
28
- Greedy = new("greedy")
29
- Adaptive = new("adaptive")
30
- Bayesian = new("bayesian")
31
- end
32
- end
33
- # MIPROv2: Multi-prompt Instruction Proposal with Retrieval Optimization
34
- # State-of-the-art prompt optimization combining bootstrap sampling,
35
- # instruction generation, and Bayesian optimization
36
- class MIPROv2 < Teleprompter
37
- extend T::Sig
38
- include Dry::Configurable
39
-
40
- # Auto-configuration modes for different optimization needs
41
- module AutoMode
42
- extend T::Sig
43
-
44
- sig do
45
- params(
46
- metric: T.nilable(T.proc.params(arg0: T.untyped, arg1: T.untyped).returns(T.untyped)),
47
- kwargs: T.untyped
48
- ).returns(MIPROv2)
49
- end
50
- def self.light(metric: nil, **kwargs)
51
- optimizer = MIPROv2.new(metric: metric, **kwargs)
52
- optimizer.configure do |config|
53
- config.num_trials = 6
54
- config.num_instruction_candidates = 3
55
- config.max_bootstrapped_examples = 2
56
- config.max_labeled_examples = 8
57
- config.bootstrap_sets = 3
58
- config.optimization_strategy = :greedy
59
- config.early_stopping_patience = 2
60
- end
61
- optimizer
62
- end
63
-
64
- sig do
65
- params(
66
- metric: T.nilable(T.proc.params(arg0: T.untyped, arg1: T.untyped).returns(T.untyped)),
67
- kwargs: T.untyped
68
- ).returns(MIPROv2)
69
- end
70
- def self.medium(metric: nil, **kwargs)
71
- optimizer = MIPROv2.new(metric: metric, **kwargs)
72
- optimizer.configure do |config|
73
- config.num_trials = 12
74
- config.num_instruction_candidates = 5
75
- config.max_bootstrapped_examples = 4
76
- config.max_labeled_examples = 16
77
- config.bootstrap_sets = 5
78
- config.optimization_strategy = :adaptive
79
- config.early_stopping_patience = 3
80
- end
81
- optimizer
82
- end
83
-
84
- sig do
85
- params(
86
- metric: T.nilable(T.proc.params(arg0: T.untyped, arg1: T.untyped).returns(T.untyped)),
87
- kwargs: T.untyped
88
- ).returns(MIPROv2)
89
- end
90
- def self.heavy(metric: nil, **kwargs)
91
- optimizer = MIPROv2.new(metric: metric, **kwargs)
92
- optimizer.configure do |config|
93
- config.num_trials = 18
94
- config.num_instruction_candidates = 8
95
- config.max_bootstrapped_examples = 6
96
- config.max_labeled_examples = 24
97
- config.bootstrap_sets = 8
98
- config.optimization_strategy = :bayesian
99
- config.early_stopping_patience = 5
100
- end
101
- optimizer
102
- end
103
- end
104
-
105
- # Dry-configurable settings for MIPROv2
106
- setting :num_trials, default: 12
107
- setting :num_instruction_candidates, default: 5
108
- setting :bootstrap_sets, default: 5
109
- setting :max_bootstrapped_examples, default: 4
110
- setting :max_labeled_examples, default: 16
111
- setting :optimization_strategy, default: OptimizationStrategy::Adaptive, constructor: ->(value) {
112
- # Coerce symbols to enum values
113
- case value
114
- when :greedy then OptimizationStrategy::Greedy
115
- when :adaptive then OptimizationStrategy::Adaptive
116
- when :bayesian then OptimizationStrategy::Bayesian
117
- when OptimizationStrategy then value
118
- when nil then OptimizationStrategy::Adaptive
119
- else
120
- raise ArgumentError, "Invalid optimization strategy: #{value}. Must be one of :greedy, :adaptive, :bayesian"
121
- end
122
- }
123
- setting :init_temperature, default: 1.0
124
- setting :final_temperature, default: 0.1
125
- setting :early_stopping_patience, default: 3
126
- setting :use_bayesian_optimization, default: true
127
- setting :track_diversity, default: true
128
- setting :max_errors, default: 3
129
- setting :num_threads, default: 1
130
- setting :minibatch_size, default: nil
131
-
132
- # Class-level configuration method - sets defaults for new instances
133
- def self.configure(&block)
134
- if block_given?
135
- # Store configuration in a class variable for new instances
136
- @default_config_block = block
137
- end
138
- end
139
-
140
- # Get the default configuration block
141
- def self.default_config_block
142
- @default_config_block
143
- end
144
-
145
-
146
- # Simple data structure for evaluated candidate configurations (immutable)
147
- EvaluatedCandidate = Data.define(
148
- :instruction,
149
- :few_shot_examples,
150
- :type,
151
- :metadata,
152
- :config_id
153
- ) do
154
- extend T::Sig
155
-
156
- # Generate a config ID based on content
157
- sig { params(instruction: String, few_shot_examples: T::Array[T.untyped], type: CandidateType, metadata: T::Hash[Symbol, T.untyped]).returns(EvaluatedCandidate) }
158
- def self.create(instruction:, few_shot_examples: [], type: CandidateType::Baseline, metadata: {})
159
- content = "#{instruction}_#{few_shot_examples.size}_#{type.serialize}_#{metadata.hash}"
160
- config_id = Digest::SHA256.hexdigest(content)[0, 12]
161
-
162
- new(
163
- instruction: instruction.freeze,
164
- few_shot_examples: few_shot_examples.freeze,
165
- type: type,
166
- metadata: metadata.freeze,
167
- config_id: config_id
168
- )
169
- end
170
-
171
- sig { returns(T::Hash[Symbol, T.untyped]) }
172
- def to_h
173
- {
174
- instruction: instruction,
175
- few_shot_examples: few_shot_examples.size,
176
- type: type.serialize,
177
- metadata: metadata,
178
- config_id: config_id
179
- }
180
- end
181
- end
182
-
183
- # Result of MIPROv2 optimization
184
- class MIPROv2Result < OptimizationResult
185
- extend T::Sig
186
-
187
- sig { returns(T::Array[EvaluatedCandidate]) }
188
- attr_reader :evaluated_candidates
189
-
190
- sig { returns(T::Hash[Symbol, T.untyped]) }
191
- attr_reader :optimization_trace
192
-
193
- sig { returns(T::Hash[Symbol, T.untyped]) }
194
- attr_reader :bootstrap_statistics
195
-
196
- sig { returns(T::Hash[Symbol, T.untyped]) }
197
- attr_reader :proposal_statistics
198
-
199
- sig { returns(T.nilable(DSPy::Evaluate::BatchEvaluationResult)) }
200
- attr_reader :best_evaluation_result
201
-
202
- sig do
203
- params(
204
- optimized_program: T.untyped,
205
- scores: T::Hash[Symbol, T.untyped],
206
- history: T::Hash[Symbol, T.untyped],
207
- evaluated_candidates: T::Array[EvaluatedCandidate],
208
- optimization_trace: T::Hash[Symbol, T.untyped],
209
- bootstrap_statistics: T::Hash[Symbol, T.untyped],
210
- proposal_statistics: T::Hash[Symbol, T.untyped],
211
- best_score_name: T.nilable(String),
212
- best_score_value: T.nilable(Float),
213
- metadata: T::Hash[Symbol, T.untyped],
214
- best_evaluation_result: T.nilable(DSPy::Evaluate::BatchEvaluationResult)
215
- ).void
216
- end
217
- def initialize(optimized_program:, scores:, history:, evaluated_candidates:, optimization_trace:, bootstrap_statistics:, proposal_statistics:, best_score_name: nil, best_score_value: nil, metadata: {}, best_evaluation_result: nil)
218
- super(
219
- optimized_program: optimized_program,
220
- scores: scores,
221
- history: history,
222
- best_score_name: best_score_name,
223
- best_score_value: best_score_value,
224
- metadata: metadata
225
- )
226
- @evaluated_candidates = evaluated_candidates.freeze
227
- @optimization_trace = optimization_trace.freeze
228
- @bootstrap_statistics = bootstrap_statistics.freeze
229
- @proposal_statistics = proposal_statistics.freeze
230
- @best_evaluation_result = best_evaluation_result&.freeze
231
- end
232
-
233
- sig { returns(T::Hash[Symbol, T.untyped]) }
234
- def to_h
235
- super.merge({
236
- evaluated_candidates: @evaluated_candidates.map(&:to_h),
237
- optimization_trace: @optimization_trace,
238
- bootstrap_statistics: @bootstrap_statistics,
239
- proposal_statistics: @proposal_statistics,
240
- best_evaluation_result: @best_evaluation_result&.to_h
241
- })
242
- end
243
- end
244
-
245
- sig { returns(MIPROv2Config) }
246
- attr_reader :mipro_config
247
-
248
- sig { returns(T.nilable(DSPy::Propose::GroundedProposer)) }
249
- attr_reader :proposer
250
-
251
- # Override dry-configurable's initialize to add our parameter validation
252
- def initialize(metric: nil, **kwargs)
253
- # Reject old config parameter pattern
254
- if kwargs.key?(:config)
255
- raise ArgumentError, "config parameter is no longer supported. Use .configure blocks instead."
256
- end
257
-
258
- # Let dry-configurable handle its initialization
259
- super(**kwargs)
260
-
261
- # Apply class-level configuration if it exists
262
- if self.class.default_config_block
263
- configure(&self.class.default_config_block)
264
- end
265
-
266
- @metric = metric
267
-
268
- # Initialize proposer with a basic config for now (will be updated later)
269
- @proposer = DSPy::Propose::GroundedProposer.new(config: DSPy::Propose::GroundedProposer::Config.new)
270
- @optimization_trace = []
271
- @evaluated_candidates = []
272
- @trial_history = {}
273
- end
274
-
275
- # Main MIPROv2 optimization method
276
- sig do
277
- params(
278
- program: T.untyped,
279
- trainset: T::Array[T.untyped],
280
- valset: T.nilable(T::Array[T.untyped])
281
- ).returns(MIPROv2Result)
282
- end
283
- def compile(program, trainset:, valset: nil)
284
- validate_inputs(program, trainset, valset)
285
-
286
- instrument_step('miprov2_compile', {
287
- trainset_size: trainset.size,
288
- valset_size: valset&.size || 0,
289
- num_trials: config.num_trials,
290
- optimization_strategy: optimization_strategy_name,
291
- mode: infer_auto_mode
292
- }) do
293
- # Convert examples to typed format
294
- typed_trainset = ensure_typed_examples(trainset)
295
- typed_valset = valset ? ensure_typed_examples(valset) : nil
296
-
297
- # Use validation set if available, otherwise use part of training set
298
- evaluation_set = typed_valset || typed_trainset.take([typed_trainset.size / 3, 10].max)
299
-
300
- # Phase 1: Bootstrap few-shot examples
301
- emit_event('phase_start', { phase: 1, name: 'bootstrap' })
302
- demo_candidates = phase_1_bootstrap(program, typed_trainset)
303
- emit_event('phase_complete', {
304
- phase: 1,
305
- num_predictors: demo_candidates.keys.size,
306
- demo_sets_per_predictor: demo_candidates[0]&.size || 0
307
- })
308
-
309
- # Phase 2: Generate instruction candidates
310
- emit_event('phase_start', { phase: 2, name: 'instruction_proposal' })
311
- proposal_result = phase_2_propose_instructions(program, typed_trainset, demo_candidates)
312
- emit_event('phase_complete', {
313
- phase: 2,
314
- num_candidates: proposal_result.num_candidates,
315
- best_instruction_preview: proposal_result.best_instruction[0, 50]
316
- })
317
-
318
- # Phase 3: Bayesian optimization
319
- emit_event('phase_start', { phase: 3, name: 'optimization' })
320
- optimization_result = phase_3_optimize(
321
- program,
322
- evaluation_set,
323
- proposal_result,
324
- demo_candidates
325
- )
326
- emit_event('phase_complete', {
327
- phase: 3,
328
- best_score: optimization_result[:best_score],
329
- trials_completed: optimization_result[:trials_completed]
330
- })
331
-
332
- # Build final result
333
- final_result = build_miprov2_result(
334
- optimization_result,
335
- demo_candidates,
336
- proposal_result
337
- )
338
-
339
- @trial_history = optimization_result[:trial_logs] || {}
340
-
341
- save_results(final_result)
342
- final_result
343
- end
344
- end
345
-
346
- private
347
-
348
- # Phase 1: Bootstrap few-shot examples from training data
349
- # Returns a hash mapping predictor indices to arrays of demo sets
350
- sig { params(program: T.untyped, trainset: T::Array[DSPy::Example]).returns(T::Hash[Integer, T::Array[T::Array[DSPy::FewShotExample]]]) }
351
- def phase_1_bootstrap(program, trainset)
352
- Utils.create_n_fewshot_demo_sets(
353
- program,
354
- config.bootstrap_sets, # num_candidate_sets
355
- trainset,
356
- max_bootstrapped_demos: config.max_bootstrapped_examples,
357
- max_labeled_demos: config.max_labeled_examples,
358
- metric: @metric
359
- )
360
- end
361
-
362
- # Phase 2: Generate instruction candidates using grounded proposer
363
- sig do
364
- params(
365
- program: T.untyped,
366
- trainset: T::Array[DSPy::Example],
367
- demo_candidates: T::Hash[Integer, T::Array[T::Array[DSPy::FewShotExample]]]
368
- ).returns(DSPy::Propose::GroundedProposer::ProposalResult)
369
- end
370
- def phase_2_propose_instructions(program, trainset, demo_candidates)
371
- # Get current instruction if available
372
- current_instruction = extract_current_instruction(program)
373
-
374
- # Use few-shot examples from bootstrap if available
375
- # Flatten demo sets from first predictor and take first 5 examples
376
- few_shot_examples = demo_candidates[0]&.flatten&.take(5) || []
377
-
378
- # Re-initialize proposer with program and trainset for awareness features
379
- # This enables program_aware and use_dataset_summary flags to work correctly
380
- proposer_config = DSPy::Propose::GroundedProposer::Config.new
381
- proposer_config.num_instruction_candidates = config.num_instruction_candidates
382
-
383
- @proposer = DSPy::Propose::GroundedProposer.new(
384
- config: proposer_config,
385
- program: program,
386
- trainset: trainset
387
- )
388
-
389
- @proposer.propose_instructions_for_program(
390
- trainset: trainset,
391
- program: program,
392
- demo_candidates: demo_candidates,
393
- trial_logs: @trial_history,
394
- num_instruction_candidates: config.num_instruction_candidates
395
- )
396
- end
397
-
398
- # Phase 3: Bayesian optimization to find best configuration
399
- sig do
400
- params(
401
- program: T.untyped,
402
- evaluation_set: T::Array[DSPy::Example],
403
- proposal_result: DSPy::Propose::GroundedProposer::ProposalResult,
404
- demo_candidates: T::Hash[Integer, T::Array[T::Array[DSPy::FewShotExample]]]
405
- ).returns(T::Hash[Symbol, T.untyped])
406
- end
407
- def phase_3_optimize(program, evaluation_set, proposal_result, demo_candidates)
408
- # Generate candidate configurations
409
- candidates = generate_candidate_configurations(proposal_result, demo_candidates)
410
-
411
- # Initialize optimization state
412
- optimization_state = initialize_optimization_state(candidates)
413
-
414
- # Initialize trial tracking structures
415
- trial_logs = {}
416
- param_score_dict = Hash.new { |hash, key| hash[key] = [] }
417
- fully_evaled_param_combos = {}
418
- total_eval_calls = 0
419
-
420
- # Run optimization trials
421
- trials_completed = 0
422
- best_score = 0.0
423
- best_candidate = nil
424
- best_program = program
425
- best_evaluation_result = nil
426
-
427
- config.num_trials.times do |trial_idx|
428
- trials_completed = trial_idx + 1
429
-
430
- # Select next candidate based on optimization strategy
431
- candidate = select_next_candidate(candidates, optimization_state, trial_idx)
432
- batch_size = evaluation_set.size
433
-
434
- trial_logs[trials_completed] = create_trial_log_entry(
435
- trial_number: trials_completed,
436
- candidate: candidate,
437
- evaluation_type: :full,
438
- batch_size: batch_size
439
- )
440
-
441
- emit_event('trial_start', {
442
- trial_number: trials_completed,
443
- candidate_id: candidate.config_id,
444
- instruction_preview: candidate.instruction[0, 50],
445
- num_few_shot: candidate.few_shot_examples.size
446
- })
447
-
448
- begin
449
- # Evaluate candidate
450
- score, modified_program, evaluation_result = evaluate_candidate(program, candidate, evaluation_set)
451
- total_eval_calls += batch_size
452
-
453
- instructions_snapshot = extract_program_instructions(modified_program)
454
- trial_logs[trials_completed][:instructions] = instructions_snapshot unless instructions_snapshot.empty?
455
- trial_logs[trials_completed][:instruction] = instructions_snapshot[0] if instructions_snapshot.key?(0)
456
-
457
- # Update optimization state
458
- update_optimization_state(optimization_state, candidate, score)
459
- record_param_score(
460
- param_score_dict,
461
- candidate,
462
- score,
463
- evaluation_type: :full,
464
- instructions: instructions_snapshot
465
- )
466
- update_fully_evaled_param_combos(
467
- fully_evaled_param_combos,
468
- candidate,
469
- score,
470
- instructions: instructions_snapshot
471
- )
472
-
473
- # Track best result
474
- is_best = best_candidate.nil? || score > best_score
475
- if is_best
476
- best_score = score
477
- best_candidate = candidate
478
- best_program = modified_program
479
- best_evaluation_result = evaluation_result
480
- end
481
-
482
- finalize_trial_log_entry(
483
- trial_logs,
484
- trials_completed,
485
- score: score,
486
- evaluation_type: :full,
487
- batch_size: batch_size,
488
- total_eval_calls: total_eval_calls
489
- )
490
-
491
- emit_event('trial_complete', {
492
- trial_number: trials_completed,
493
- score: score,
494
- is_best: is_best,
495
- candidate_id: candidate.config_id
496
- })
497
-
498
- # Check early stopping
499
- if should_early_stop?(optimization_state, trial_idx)
500
- DSPy.logger.info("Early stopping at trial #{trials_completed}")
501
- break
502
- end
503
-
504
- rescue => error
505
- finalize_trial_log_entry(
506
- trial_logs,
507
- trials_completed,
508
- score: nil,
509
- evaluation_type: :full,
510
- batch_size: batch_size,
511
- total_eval_calls: total_eval_calls,
512
- error: error.message
513
- )
514
-
515
- emit_event('trial_error', {
516
- trial_number: trials_completed,
517
- error: error.message,
518
- candidate_id: candidate.config_id
519
- })
520
-
521
- DSPy.logger.warn("Trial #{trials_completed} failed: #{error.message}")
522
- end
523
- end
524
-
525
- {
526
- best_score: best_score,
527
- best_candidate: best_candidate,
528
- best_program: best_program,
529
- best_evaluation_result: best_evaluation_result,
530
- trials_completed: trials_completed,
531
- optimization_state: optimization_state,
532
- evaluated_candidates: @evaluated_candidates,
533
- trial_logs: trial_logs,
534
- param_score_dict: param_score_dict,
535
- fully_evaled_param_combos: fully_evaled_param_combos,
536
- total_eval_calls: total_eval_calls
537
- }
538
- end
539
-
540
- # Generate candidate configurations from proposals and demo candidates
541
- sig do
542
- params(
543
- proposal_result: DSPy::Propose::GroundedProposer::ProposalResult,
544
- demo_candidates: T::Hash[Integer, T::Array[T::Array[DSPy::FewShotExample]]]
545
- ).returns(T::Array[EvaluatedCandidate])
546
- end
547
- def generate_candidate_configurations(proposal_result, demo_candidates)
548
- candidates = []
549
-
550
- predictor_instruction_map = if proposal_result.respond_to?(:predictor_instructions) && proposal_result.predictor_instructions.any?
551
- proposal_result.predictor_instructions
552
- else
553
- { 0 => proposal_result.candidate_instructions }
554
- end
555
-
556
- instruction_maps = build_instruction_maps(predictor_instruction_map)
557
- demo_maps = build_demo_maps(demo_candidates)
558
-
559
- # Base configuration (no modifications)
560
- candidates << EvaluatedCandidate.new(
561
- instruction: "",
562
- few_shot_examples: [],
563
- type: CandidateType::Baseline,
564
- metadata: {
565
- instructions_map: {},
566
- demos_map: {}
567
- },
568
- config_id: SecureRandom.hex(6)
569
- )
570
-
571
- instruction_maps.each_with_index do |instruction_map, combo_idx|
572
- primary_instruction = instruction_map[0] || instruction_map.values.first || ""
573
- candidates << EvaluatedCandidate.new(
574
- instruction: primary_instruction,
575
- few_shot_examples: [],
576
- type: CandidateType::InstructionOnly,
577
- metadata: {
578
- proposal_rank: combo_idx,
579
- instructions_map: duplicate_instruction_map(instruction_map),
580
- demos_map: {}
581
- },
582
- config_id: SecureRandom.hex(6)
583
- )
584
- end
585
-
586
- demo_maps.each_with_index do |demo_map, idx|
587
- next if demo_map.empty?
588
-
589
- flattened_examples = demo_map.values.flatten
590
- candidates << EvaluatedCandidate.new(
591
- instruction: "",
592
- few_shot_examples: flattened_examples,
593
- type: CandidateType::FewShotOnly,
594
- metadata: {
595
- bootstrap_rank: idx,
596
- instructions_map: {},
597
- demos_map: duplicate_demo_map(demo_map)
598
- },
599
- config_id: SecureRandom.hex(6)
600
- )
601
- end
602
-
603
- # Combined candidates (instruction + few-shot)
604
- instruction_maps.each_with_index do |instruction_map, combo_idx|
605
- primary_instruction = instruction_map[0] || instruction_map.values.first || ""
606
- demo_maps.first(3).each_with_index do |demo_map, demo_idx|
607
- next if demo_map.empty?
608
-
609
- flattened_examples = demo_map.values.flatten
610
- candidates << EvaluatedCandidate.new(
611
- instruction: primary_instruction,
612
- few_shot_examples: flattened_examples,
613
- type: CandidateType::Combined,
614
- metadata: {
615
- instruction_rank: combo_idx,
616
- bootstrap_rank: demo_idx,
617
- instructions_map: duplicate_instruction_map(instruction_map),
618
- demos_map: duplicate_demo_map(demo_map)
619
- },
620
- config_id: SecureRandom.hex(6)
621
- )
622
- end
623
- end
624
-
625
- candidates
626
- end
627
-
628
- sig { params(predictor_instruction_map: T::Hash[Integer, T::Array[String]]).returns(T::Array[T::Hash[Integer, String]]) }
629
- def build_instruction_maps(predictor_instruction_map)
630
- return [{}] if predictor_instruction_map.nil? || predictor_instruction_map.empty?
631
-
632
- normalized = predictor_instruction_map.each_with_object({}) do |(index, instructions), memo|
633
- next if instructions.nil? || instructions.empty?
634
- memo[index] = instructions.take(3)
635
- end
636
-
637
- return [{}] if normalized.empty?
638
-
639
- cartesian_product(normalized)
640
- end
641
-
642
- sig do
643
- params(demo_candidates: T::Hash[Integer, T::Array[T::Array[DSPy::FewShotExample]]]).returns(T::Array[T::Hash[Integer, T::Array[DSPy::FewShotExample]]])
644
- end
645
- def build_demo_maps(demo_candidates)
646
- return [{}] if demo_candidates.nil? || demo_candidates.empty?
647
-
648
- normalized = demo_candidates.each_with_object({}) do |(index, sets), memo|
649
- next if sets.nil? || sets.empty?
650
- memo[index] = sets.take(3)
651
- end
652
-
653
- return [{}] if normalized.empty?
654
-
655
- cartesian_product(normalized)
656
- end
657
-
658
- sig do
659
- params(options_hash: T::Hash[Integer, T::Array[T.untyped]]).returns(T::Array[T::Hash[Integer, T.untyped]])
660
- end
661
- def cartesian_product(options_hash)
662
- options_hash.sort_by { |index, _| index }.reduce([{}]) do |acc, (index, values)|
663
- next acc if values.nil? || values.empty?
664
-
665
- acc.flat_map do |existing|
666
- values.map do |value|
667
- existing.merge(index => value)
668
- end
669
- end
670
- end
671
- end
672
-
673
- sig { params(instruction_map: T::Hash[Integer, String]).returns(T::Hash[Integer, String]) }
674
- def duplicate_instruction_map(instruction_map)
675
- instruction_map.each_with_object({}) do |(index, instruction), memo|
676
- memo[index] = instruction.is_a?(String) ? instruction.dup : instruction
677
- end
678
- end
679
-
680
- sig do
681
- params(demo_map: T::Hash[Integer, T::Array[DSPy::FewShotExample]]).returns(T::Hash[Integer, T::Array[DSPy::FewShotExample]])
682
- end
683
- def duplicate_demo_map(demo_map)
684
- demo_map.each_with_object({}) do |(index, demos), memo|
685
- next if demos.nil?
686
- memo[index] = demos.map { |demo| demo }
687
- end
688
- end
689
-
690
- sig { params(examples: T::Array[T.untyped]).returns(T::Array[DSPy::FewShotExample]) }
691
- def normalize_few_shot_examples(examples)
692
- examples.map do |example|
693
- if example.is_a?(DSPy::FewShotExample)
694
- example
695
- elsif example.is_a?(DSPy::Example)
696
- DSPy::FewShotExample.new(
697
- input: example.input_values,
698
- output: example.expected_values,
699
- reasoning: extract_reasoning_from_example(example)
700
- )
701
- else
702
- example
703
- end
704
- end
705
- end
706
-
707
- sig { params(predictor: T.untyped, examples: T::Array[DSPy::FewShotExample]).void }
708
- def assign_predictor_examples(predictor, examples)
709
- predictor.demos = examples if predictor.respond_to?(:demos=)
710
- return unless predictor.respond_to?(:prompt)
711
-
712
- cloned_examples = examples.map { |ex| ex }
713
- predictor.prompt.instance_variable_set(:@few_shot_examples, cloned_examples.freeze)
714
- end
715
-
716
- # Initialize optimization state for candidate selection
717
- sig { params(candidates: T::Array[EvaluatedCandidate]).returns(T::Hash[Symbol, T.untyped]) }
718
- def initialize_optimization_state(candidates)
719
- {
720
- candidates: candidates,
721
- scores: {},
722
- exploration_counts: Hash.new(0),
723
- temperature: config.init_temperature,
724
- best_score_history: [],
725
- diversity_scores: {},
726
- no_improvement_count: 0
727
- }
728
- end
729
-
730
- # Select next candidate based on optimization strategy
731
- sig do
732
- params(
733
- candidates: T::Array[EvaluatedCandidate],
734
- state: T::Hash[Symbol, T.untyped],
735
- trial_idx: Integer
736
- ).returns(EvaluatedCandidate)
737
- end
738
- def select_next_candidate(candidates, state, trial_idx)
739
- case config.optimization_strategy
740
- when OptimizationStrategy::Greedy
741
- select_candidate_greedy(candidates, state)
742
- when OptimizationStrategy::Adaptive
743
- select_candidate_adaptive(candidates, state, trial_idx)
744
- when OptimizationStrategy::Bayesian
745
- select_candidate_bayesian(candidates, state, trial_idx)
746
- else
747
- candidates.sample # Random fallback
748
- end
749
- end
750
-
751
- # Greedy candidate selection (exploit best known configurations)
752
- sig { params(candidates: T::Array[EvaluatedCandidate], state: T::Hash[Symbol, T.untyped]).returns(EvaluatedCandidate) }
753
- def select_candidate_greedy(candidates, state)
754
- # Prioritize unexplored candidates, then highest scoring
755
- unexplored = candidates.reject { |c| state[:scores].key?(c.config_id) }
756
- return unexplored.sample if unexplored.any?
757
-
758
- # Among explored, pick the best
759
- scored_candidates = candidates.select { |c| state[:scores].key?(c.config_id) }
760
- scored_candidates.max_by { |c| state[:scores][c.config_id] } || candidates.first
761
- end
762
-
763
- # Adaptive candidate selection (balance exploration and exploitation)
764
- sig do
765
- params(
766
- candidates: T::Array[EvaluatedCandidate],
767
- state: T::Hash[Symbol, T.untyped],
768
- trial_idx: Integer
769
- ).returns(EvaluatedCandidate)
770
- end
771
- def select_candidate_adaptive(candidates, state, trial_idx)
772
- # Update temperature based on progress
773
- progress = trial_idx.to_f / config.num_trials
774
- state[:temperature] = config.init_temperature * (1 - progress) + config.final_temperature * progress
775
-
776
- # Calculate selection scores combining exploitation and exploration
777
- candidate_scores = candidates.map do |candidate|
778
- exploitation_score = state[:scores][candidate.config_id] || 0.0
779
- exploration_bonus = 1.0 / (state[:exploration_counts][candidate.config_id] + 1)
780
-
781
- total_score = exploitation_score + state[:temperature] * exploration_bonus
782
- [candidate, total_score]
783
- end
784
-
785
- # Select using softmax with temperature
786
- if state[:temperature] > 0.01
787
- # Probabilistic selection
788
- weights = candidate_scores.map { |_, score| Math.exp(score / state[:temperature]) }
789
- total_weight = weights.sum
790
- probabilities = weights.map { |w| w / total_weight }
791
-
792
- random_value = rand
793
- cumulative = 0.0
794
- candidate_scores.each_with_index do |(candidate, _), idx|
795
- cumulative += probabilities[idx]
796
- return candidate if random_value <= cumulative
797
- end
798
- end
799
-
800
- # Fallback to highest scoring
801
- candidate_scores.max_by { |_, score| score }.first
802
- end
803
-
804
- # Bayesian candidate selection (use probabilistic model)
805
- sig do
806
- params(
807
- candidates: T::Array[EvaluatedCandidate],
808
- state: T::Hash[Symbol, T.untyped],
809
- trial_idx: Integer
810
- ).returns(EvaluatedCandidate)
811
- end
812
- def select_candidate_bayesian(candidates, state, trial_idx)
813
- # Need at least 3 observations to fit GP, otherwise fall back to adaptive
814
- return select_candidate_adaptive(candidates, state, trial_idx) if state[:scores].size < 3
815
-
816
- # Get scored candidates for training the GP
817
- scored_candidates = candidates.select { |c| state[:scores].key?(c.config_id) }
818
- return select_candidate_adaptive(candidates, state, trial_idx) if scored_candidates.size < 3
819
-
820
- begin
821
- # Encode candidates as numerical features
822
- all_candidate_features = encode_candidates_for_gp(candidates)
823
- scored_features = encode_candidates_for_gp(scored_candidates)
824
- scored_targets = scored_candidates.map { |c| state[:scores][c.config_id].to_f }
825
-
826
- # Train Gaussian Process
827
- gp = DSPy::Optimizers::GaussianProcess.new(
828
- length_scale: 1.0,
829
- signal_variance: 1.0,
830
- noise_variance: 0.01
831
- )
832
- gp.fit(scored_features, scored_targets)
833
-
834
- # Predict mean and uncertainty for all candidates
835
- means, stds = gp.predict(all_candidate_features, return_std: true)
836
-
837
- # Upper Confidence Bound (UCB) acquisition function
838
- kappa = 2.0 * Math.sqrt(Math.log(trial_idx + 1)) # Exploration parameter
839
- acquisition_scores = means.to_a.zip(stds.to_a).map { |m, s| m + kappa * s }
840
-
841
- # Select candidate with highest acquisition score
842
- best_idx = acquisition_scores.each_with_index.max_by { |score, _| score }[1]
843
- candidates[best_idx]
844
-
845
- rescue => e
846
- # If GP fails for any reason, fall back to adaptive selection
847
- DSPy.logger.warn("Bayesian optimization failed: #{e.message}. Falling back to adaptive selection.")
848
- select_candidate_adaptive(candidates, state, trial_idx)
849
- end
850
- end
851
-
852
- private
853
-
854
-
855
- # Encode candidates as numerical features for Gaussian Process
856
- sig { params(candidates: T::Array[EvaluatedCandidate]).returns(T::Array[T::Array[Float]]) }
857
- def encode_candidates_for_gp(candidates)
858
- # Simple encoding: use hash of config as features
859
- # In practice, this could be more sophisticated (e.g., instruction embeddings)
860
- candidates.map do |candidate|
861
- # Create deterministic numerical features from the candidate config
862
- config_hash = candidate.config_id.hash.abs
863
-
864
- # Extract multiple features to create a feature vector
865
- features = []
866
- features << (config_hash % 1000).to_f / 1000.0 # Feature 1: hash mod 1000, normalized
867
- features << ((config_hash / 1000) % 1000).to_f / 1000.0 # Feature 2: different part of hash
868
- features << ((config_hash / 1_000_000) % 1000).to_f / 1000.0 # Feature 3: high bits
869
-
870
- # Add instruction length if available (Python-compatible: no cap)
871
- instruction = candidate.instruction
872
- if instruction && !instruction.empty?
873
- features << instruction.length.to_f / 100.0 # Instruction length, uncapped
874
- else
875
- features << 0.5 # Default value
876
- end
877
-
878
- features
879
- end
880
- end
881
-
882
- # Evaluate a candidate configuration
883
- sig do
884
- params(
885
- program: T.untyped,
886
- candidate: EvaluatedCandidate,
887
- evaluation_set: T::Array[DSPy::Example]
888
- ).returns([Float, T.untyped, DSPy::Evaluate::BatchEvaluationResult])
889
- end
890
- def evaluate_candidate(program, candidate, evaluation_set)
891
- # Apply candidate configuration to program
892
- modified_program = apply_candidate_configuration(program, candidate)
893
-
894
- # Evaluate modified program
895
- evaluation_result = if use_concurrent_evaluation?(evaluation_set)
896
- evaluate_candidate_concurrently(modified_program, evaluation_set)
897
- else
898
- evaluate_program(modified_program, evaluation_set)
899
- end
900
-
901
- # Store evaluation details
902
- @evaluated_candidates << candidate
903
-
904
- [evaluation_result.pass_rate, modified_program, evaluation_result]
905
- end
906
-
907
- sig { params(evaluation_set: T::Array[DSPy::Example]).returns(T::Boolean) }
908
- def use_concurrent_evaluation?(evaluation_set)
909
- minibatch_size = config.minibatch_size
910
- return false unless minibatch_size&.positive?
911
- return false unless config.num_threads && config.num_threads > 1
912
-
913
- evaluation_set.size > minibatch_size
914
- end
915
-
916
- sig do
917
- params(
918
- modified_program: T.untyped,
919
- evaluation_set: T::Array[DSPy::Example]
920
- ).returns(DSPy::Evaluate::BatchEvaluationResult)
921
- end
922
- def evaluate_candidate_concurrently(modified_program, evaluation_set)
923
- chunk_size = T.must(config.minibatch_size)
924
- chunks = evaluation_set.each_slice(chunk_size).map(&:dup)
925
- return evaluate_program(modified_program, evaluation_set) if chunks.size <= 1
926
-
927
- pool_size = [config.num_threads, chunks.size].min
928
- pool_size = 1 if pool_size <= 0
929
- executor = Concurrent::FixedThreadPool.new(pool_size)
930
-
931
- futures = chunks.map do |chunk|
932
- Concurrent::Promises.future_on(executor) do
933
- evaluate_program(modified_program, chunk)
934
- end
935
- end
936
-
937
- results = futures.map(&:value!)
938
- combine_batch_results(results)
939
- ensure
940
- if executor
941
- executor.shutdown
942
- executor.wait_for_termination
943
- end
944
- end
945
-
946
- sig do
947
- params(batch_results: T::Array[DSPy::Evaluate::BatchEvaluationResult]).returns(DSPy::Evaluate::BatchEvaluationResult)
948
- end
949
- def combine_batch_results(batch_results)
950
- return DSPy::Evaluate::BatchEvaluationResult.new(results: [], aggregated_metrics: {}) if batch_results.empty?
951
-
952
- combined_results = batch_results.flat_map(&:results)
953
- total_examples = batch_results.sum(&:total_examples)
954
- aggregated_metrics = merge_aggregated_metrics(batch_results, total_examples)
955
-
956
- DSPy::Evaluate::BatchEvaluationResult.new(
957
- results: combined_results,
958
- aggregated_metrics: aggregated_metrics
959
- )
960
- end
961
-
962
- sig do
963
- params(
964
- batch_results: T::Array[DSPy::Evaluate::BatchEvaluationResult],
965
- total_examples: Integer
966
- ).returns(T::Hash[Symbol, T.untyped])
967
- end
968
- def merge_aggregated_metrics(batch_results, total_examples)
969
- return {} if total_examples.zero?
970
-
971
- keys = batch_results.flat_map { |res| res.aggregated_metrics.keys }.uniq
972
- keys.each_with_object({}) do |key, memo|
973
- numeric_weight = 0.0
974
- numeric_sum = 0.0
975
- fallback_value = nil
976
-
977
- batch_results.each do |res|
978
- value = res.aggregated_metrics[key]
979
- next if value.nil?
980
-
981
- if value.is_a?(Numeric)
982
- numeric_sum += value.to_f * res.total_examples
983
- numeric_weight += res.total_examples
984
- else
985
- fallback_value = value
986
- end
987
- end
988
-
989
- if numeric_weight.positive?
990
- memo[key] = numeric_sum / numeric_weight
991
- elsif fallback_value
992
- memo[key] = fallback_value
993
- end
994
- end
995
- end
996
-
997
- # Apply candidate configuration to program
998
- sig { params(program: T.untyped, candidate: EvaluatedCandidate).returns(T.untyped) }
999
- def apply_candidate_configuration(program, candidate)
1000
- instructions_map = candidate.metadata[:instructions_map] || {}
1001
- demos_map = candidate.metadata[:demos_map] || {}
1002
-
1003
- modified_program = program
1004
- if modified_program.respond_to?(:predictors) && (instructions_map.any? || demos_map.any?)
1005
- modified_program = modified_program.clone
1006
- modified_program.predictors.each_with_index do |predictor, idx|
1007
- if instructions_map.key?(idx)
1008
- signature = Utils.get_signature(predictor)
1009
- updated_signature = signature.with_instructions(instructions_map[idx])
1010
- Utils.set_signature(predictor, updated_signature)
1011
- end
1012
-
1013
- if demos_map.key?(idx)
1014
- normalized_examples = normalize_few_shot_examples(demos_map[idx])
1015
- assign_predictor_examples(predictor, normalized_examples)
1016
- end
1017
- end
1018
- end
1019
-
1020
- # Apply instruction if provided (top-level programs still respect with_instruction)
1021
- if !candidate.instruction.empty? && modified_program.respond_to?(:with_instruction)
1022
- modified_program = modified_program.with_instruction(candidate.instruction)
1023
- end
1024
-
1025
- should_apply_global_examples = candidate.few_shot_examples.any? &&
1026
- modified_program.respond_to?(:with_examples) &&
1027
- (demos_map.empty? || !modified_program.respond_to?(:predictors))
1028
-
1029
- if should_apply_global_examples
1030
- normalized_few_shot = normalize_few_shot_examples(candidate.few_shot_examples)
1031
- modified_program = modified_program.with_examples(normalized_few_shot)
1032
- end
1033
-
1034
- modified_program
1035
- end
1036
-
1037
- # Update optimization state after candidate evaluation
1038
- sig do
1039
- params(
1040
- state: T::Hash[Symbol, T.untyped],
1041
- candidate: EvaluatedCandidate,
1042
- score: Float
1043
- ).void
1044
- end
1045
- def update_optimization_state(state, candidate, score)
1046
- state[:scores][candidate.config_id] = score
1047
- state[:exploration_counts][candidate.config_id] += 1
1048
- state[:best_score_history] << score
1049
-
1050
- # Track diversity if enabled
1051
- if config.track_diversity
1052
- state[:diversity_scores][candidate.config_id] = calculate_diversity_score(candidate)
1053
- end
1054
-
1055
- # Update no improvement counter
1056
- if state[:best_score_history].size > 1 && score > state[:best_score_history][-2]
1057
- state[:no_improvement_count] = 0
1058
- else
1059
- state[:no_improvement_count] += 1
1060
- end
1061
- end
1062
-
1063
- # Check if optimization should stop early
1064
- sig { params(state: T::Hash[Symbol, T.untyped], trial_idx: Integer).returns(T::Boolean) }
1065
- def should_early_stop?(state, trial_idx)
1066
- # Don't stop too early
1067
- return false if trial_idx < config.early_stopping_patience
1068
-
1069
- # Stop if no improvement for patience trials
1070
- state[:no_improvement_count] >= config.early_stopping_patience
1071
- end
1072
-
1073
- # Calculate diversity score for candidate (Python-compatible: only few-shot count)
1074
- sig { params(candidate: EvaluatedCandidate).returns(Float) }
1075
- def calculate_diversity_score(candidate)
1076
- # Python DSPy doesn't use instruction length for diversity, only few-shot count
1077
- few_shot_diversity = candidate.few_shot_examples.size / 10.0
1078
-
1079
- [few_shot_diversity, 1.0].min
1080
- end
1081
-
1082
- # Build final MIPROv2 result
1083
- sig do
1084
- params(
1085
- optimization_result: T::Hash[Symbol, T.untyped],
1086
- demo_candidates: T::Hash[Integer, T::Array[T::Array[DSPy::FewShotExample]]],
1087
- proposal_result: DSPy::Propose::GroundedProposer::ProposalResult
1088
- ).returns(MIPROv2Result)
1089
- end
1090
- def build_miprov2_result(optimization_result, demo_candidates, proposal_result)
1091
- best_candidate = optimization_result[:best_candidate]
1092
- best_program = optimization_result[:best_program]
1093
- best_score = optimization_result[:best_score]
1094
- best_evaluation_result = optimization_result[:best_evaluation_result]
1095
-
1096
- scores = { pass_rate: best_score }
1097
-
1098
- history = {
1099
- total_trials: optimization_result[:trials_completed],
1100
- optimization_strategy: optimization_strategy_name,
1101
- early_stopped: optimization_result[:trials_completed] < config.num_trials,
1102
- score_history: optimization_result[:optimization_state][:best_score_history],
1103
- total_eval_calls: optimization_result[:total_eval_calls]
1104
- }
1105
-
1106
- metadata = {
1107
- optimizer: "MIPROv2",
1108
- auto_mode: infer_auto_mode,
1109
- optimization_strategy: optimization_strategy_name,
1110
- best_instruction: best_candidate&.instruction || "",
1111
- best_few_shot_count: best_candidate&.few_shot_examples&.size || 0,
1112
- best_candidate_type: best_candidate&.type&.serialize || "unknown",
1113
- optimization_timestamp: Time.now.iso8601
1114
- }
1115
-
1116
- # Create bootstrap statistics from demo_candidates
1117
- num_predictors = demo_candidates.keys.size
1118
- sets_per_predictor = demo_candidates.values.map(&:size)
1119
- all_demo_sets = demo_candidates.values.flat_map { |sets| sets }
1120
- bootstrap_statistics = {
1121
- num_predictors: num_predictors,
1122
- demo_sets_per_predictor: sets_per_predictor.max || 0,
1123
- avg_demos_per_set: all_demo_sets.empty? ? 0 : all_demo_sets.map(&:size).sum.to_f / all_demo_sets.size
1124
- }
1125
- bootstrap_statistics[:per_predictor_demo_counts] = sets_per_predictor if sets_per_predictor.any?
1126
-
1127
- optimization_trace = serialize_optimization_trace(optimization_result[:optimization_state])
1128
- optimization_trace[:trial_logs] = serialize_trial_logs(optimization_result[:trial_logs])
1129
- optimization_trace[:param_score_dict] = serialize_param_score_dict(optimization_result[:param_score_dict])
1130
- optimization_trace[:fully_evaled_param_combos] = serialize_fully_evaled_param_combos(optimization_result[:fully_evaled_param_combos])
1131
- optimization_trace[:total_eval_calls] = optimization_result[:total_eval_calls]
1132
-
1133
- MIPROv2Result.new(
1134
- optimized_program: best_program,
1135
- scores: scores,
1136
- history: history,
1137
- best_score_name: "pass_rate",
1138
- best_score_value: best_score,
1139
- metadata: metadata,
1140
- evaluated_candidates: @evaluated_candidates,
1141
- optimization_trace: optimization_trace,
1142
- bootstrap_statistics: bootstrap_statistics,
1143
- proposal_statistics: proposal_result.analysis,
1144
- best_evaluation_result: best_evaluation_result
1145
- )
1146
- end
1147
-
1148
- # Serialize optimization trace for better JSON output
1149
- sig { params(optimization_state: T.nilable(T::Hash[Symbol, T.untyped])).returns(T::Hash[Symbol, T.untyped]) }
1150
- def serialize_optimization_trace(optimization_state)
1151
- return {} unless optimization_state
1152
-
1153
- serialized_trace = optimization_state.dup
1154
-
1155
- # Convert candidate objects to their hash representations
1156
- if serialized_trace[:candidates]
1157
- serialized_trace[:candidates] = serialized_trace[:candidates].map(&:to_h)
1158
- end
1159
-
1160
- serialized_trace
1161
- end
1162
-
1163
- sig do
1164
- params(
1165
- trial_number: Integer,
1166
- candidate: EvaluatedCandidate,
1167
- evaluation_type: Symbol,
1168
- batch_size: Integer
1169
- ).returns(T::Hash[Symbol, T.untyped])
1170
- end
1171
- def create_trial_log_entry(trial_number:, candidate:, evaluation_type:, batch_size:)
1172
- # Preserve interface parity with Python implementation (trial number stored implicitly via hash key)
1173
- trial_number # no-op to acknowledge parameter usage
1174
- instructions_map = candidate.metadata[:instructions_map] || {}
1175
- demos_map = candidate.metadata[:demos_map] || {}
1176
- entry = {
1177
- candidate_id: candidate.config_id,
1178
- candidate_type: candidate.type.serialize,
1179
- instruction_preview: candidate.instruction.to_s[0, 160],
1180
- few_shot_count: candidate.few_shot_examples.size,
1181
- metadata: deep_dup(candidate.metadata),
1182
- evaluation_type: evaluation_type,
1183
- batch_size: batch_size,
1184
- status: :in_progress,
1185
- started_at: Time.now.iso8601
1186
- }
1187
- if instructions_map.any?
1188
- entry[:instructions] = duplicate_instruction_map(instructions_map)
1189
- entry[:instruction] = entry[:instructions][0] if entry[:instructions].key?(0)
1190
- elsif candidate.instruction && !candidate.instruction.empty?
1191
- predictor_index = candidate.metadata[:predictor_index] || 0
1192
- entry[:instruction] = candidate.instruction
1193
- entry[:instructions] = { predictor_index => candidate.instruction }
1194
- end
1195
- entry[:few_shot_map] = duplicate_demo_map(demos_map) if demos_map.any?
1196
- entry
1197
- end
1198
-
1199
- sig do
1200
- params(
1201
- trial_logs: T::Hash[Integer, T::Hash[Symbol, T.untyped]],
1202
- trial_number: Integer,
1203
- score: T.nilable(Float),
1204
- evaluation_type: Symbol,
1205
- batch_size: Integer,
1206
- total_eval_calls: Integer,
1207
- error: T.nilable(String)
1208
- ).void
1209
- end
1210
- def finalize_trial_log_entry(trial_logs, trial_number, score:, evaluation_type:, batch_size:, total_eval_calls:, error: nil)
1211
- entry = trial_logs[trial_number] || {}
1212
- entry[:score] = score if score
1213
- entry[:evaluation_type] = evaluation_type
1214
- entry[:batch_size] = batch_size
1215
- entry[:total_eval_calls] = total_eval_calls
1216
- entry[:status] = error ? :error : :completed
1217
- entry[:error] = error if error
1218
- entry[:completed_at] = Time.now.iso8601
1219
- trial_logs[trial_number] = entry
1220
- end
1221
-
1222
- sig do
1223
- params(
1224
- param_score_dict: T::Hash[String, T::Array[T::Hash[Symbol, T.untyped]]],
1225
- candidate: EvaluatedCandidate,
1226
- score: Float,
1227
- evaluation_type: Symbol,
1228
- instructions: T.nilable(T::Hash[Integer, String])
1229
- ).void
1230
- end
1231
- def record_param_score(param_score_dict, candidate, score, evaluation_type:, instructions: nil)
1232
- instructions_hash = instructions || {}
1233
- if instructions_hash.empty? && candidate.instruction && !candidate.instruction.empty?
1234
- predictor_index = candidate.metadata[:predictor_index] || 0
1235
- instructions_hash[predictor_index] = candidate.instruction
1236
- end
1237
-
1238
- record = {
1239
- candidate_id: candidate.config_id,
1240
- candidate_type: candidate.type.serialize,
1241
- score: score,
1242
- evaluation_type: evaluation_type,
1243
- timestamp: Time.now.iso8601,
1244
- metadata: deep_dup(candidate.metadata)
1245
- }
1246
- primary_instruction = instructions_hash[0] || candidate.instruction
1247
- record[:instruction] = primary_instruction if primary_instruction && !primary_instruction.empty?
1248
- record[:instructions] = instructions_hash unless instructions_hash.empty?
1249
-
1250
- param_score_dict[candidate.config_id] << record
1251
- end
1252
-
1253
- sig do
1254
- params(
1255
- fully_evaled_param_combos: T::Hash[String, T::Hash[Symbol, T.untyped]],
1256
- candidate: EvaluatedCandidate,
1257
- score: Float,
1258
- instructions: T.nilable(T::Hash[Integer, String])
1259
- ).void
1260
- end
1261
- def update_fully_evaled_param_combos(fully_evaled_param_combos, candidate, score, instructions: nil)
1262
- existing = fully_evaled_param_combos[candidate.config_id]
1263
- if existing.nil? || score > existing[:score]
1264
- instructions_hash = instructions || {}
1265
- if instructions_hash.empty? && candidate.instruction && !candidate.instruction.empty?
1266
- predictor_index = candidate.metadata[:predictor_index] || 0
1267
- instructions_hash[predictor_index] = candidate.instruction
1268
- end
1269
-
1270
- fully_evaled_param_combos[candidate.config_id] = {
1271
- candidate_id: candidate.config_id,
1272
- candidate_type: candidate.type.serialize,
1273
- score: score,
1274
- metadata: deep_dup(candidate.metadata),
1275
- updated_at: Time.now.iso8601
1276
- }
1277
- unless instructions_hash.empty?
1278
- fully_evaled_param_combos[candidate.config_id][:instructions] = instructions_hash
1279
- fully_evaled_param_combos[candidate.config_id][:instruction] = instructions_hash[0] || candidate.instruction
1280
- end
1281
- end
1282
- end
1283
-
1284
- sig { params(trial_logs: T.nilable(T::Hash[Integer, T::Hash[Symbol, T.untyped]])).returns(T::Hash[Integer, T::Hash[Symbol, T.untyped]]) }
1285
- def serialize_trial_logs(trial_logs)
1286
- return {} unless trial_logs
1287
-
1288
- allowed_keys = [
1289
- :candidate_id,
1290
- :candidate_type,
1291
- :instruction_preview,
1292
- :instruction,
1293
- :instructions,
1294
- :few_shot_count,
1295
- :metadata,
1296
- :evaluation_type,
1297
- :batch_size,
1298
- :score,
1299
- :status,
1300
- :error,
1301
- :started_at,
1302
- :completed_at,
1303
- :total_eval_calls
1304
- ]
1305
-
1306
- trial_logs.transform_values do |entry|
1307
- entry.each_with_object({}) do |(key, value), memo|
1308
- memo[key] = value if allowed_keys.include?(key)
1309
- end
1310
- end
1311
- end
1312
-
1313
- sig { params(param_score_dict: T.nilable(T::Hash[String, T::Array[T::Hash[Symbol, T.untyped]]])).returns(T::Hash[String, T::Array[T::Hash[Symbol, T.untyped]]]) }
1314
- def serialize_param_score_dict(param_score_dict)
1315
- return {} unless param_score_dict
1316
-
1317
- allowed_keys = [:candidate_id, :candidate_type, :score, :evaluation_type, :timestamp, :metadata, :instruction, :instructions]
1318
-
1319
- param_score_dict.transform_values do |records|
1320
- records.map do |record|
1321
- record.each_with_object({}) do |(key, value), memo|
1322
- memo[key] = value if allowed_keys.include?(key)
1323
- end
1324
- end
1325
- end
1326
- end
1327
-
1328
- sig { params(fully_evaled_param_combos: T.nilable(T::Hash[String, T::Hash[Symbol, T.untyped]])).returns(T::Hash[String, T::Hash[Symbol, T.untyped]]) }
1329
- def serialize_fully_evaled_param_combos(fully_evaled_param_combos)
1330
- return {} unless fully_evaled_param_combos
1331
-
1332
- allowed_keys = [:candidate_id, :candidate_type, :score, :metadata, :updated_at, :instruction, :instructions]
1333
-
1334
- fully_evaled_param_combos.transform_values do |record|
1335
- record.each_with_object({}) do |(key, value), memo|
1336
- memo[key] = value if allowed_keys.include?(key)
1337
- end
1338
- end
1339
- end
1340
-
1341
- sig { params(value: T.untyped).returns(T.untyped) }
1342
- def deep_dup(value)
1343
- case value
1344
- when Hash
1345
- value.each_with_object({}) { |(k, v), memo| memo[k] = deep_dup(v) }
1346
- when Array
1347
- value.map { |element| deep_dup(element) }
1348
- else
1349
- value
1350
- end
1351
- end
1352
-
1353
- # Helper methods
1354
- sig { returns(String) }
1355
- def optimization_strategy_name
1356
- strategy = config.optimization_strategy
1357
- return strategy.serialize if strategy.respond_to?(:serialize)
1358
-
1359
- strategy.to_s
1360
- end
1361
-
1362
- sig { params(program: T.untyped).returns(T.nilable(String)) }
1363
- def extract_current_instruction(program)
1364
- if program.respond_to?(:prompt) && program.prompt.respond_to?(:instruction)
1365
- program.prompt.instruction
1366
- elsif program.respond_to?(:system_signature)
1367
- system_sig = program.system_signature
1368
- system_sig.is_a?(String) ? system_sig : nil
1369
- else
1370
- nil
1371
- end
1372
- end
1373
-
1374
- sig { params(program: T.untyped).returns(T::Hash[Integer, String]) }
1375
- def extract_program_instructions(program)
1376
- instructions = {}
1377
- if program.respond_to?(:predictors)
1378
- program.predictors.each_with_index do |predictor, index|
1379
- if predictor.respond_to?(:prompt) && predictor.prompt.respond_to?(:instruction)
1380
- value = predictor.prompt.instruction
1381
- instructions[index] = value if value
1382
- end
1383
- end
1384
- else
1385
- fallback_instruction = extract_current_instruction(program)
1386
- instructions[0] = fallback_instruction if fallback_instruction
1387
- end
1388
- instructions
1389
- end
1390
-
1391
- sig { params(program: T.untyped).returns(T.nilable(T.class_of(DSPy::Signature))) }
1392
- def extract_signature_class(program)
1393
- program.respond_to?(:signature_class) ? program.signature_class : nil
1394
- end
1395
-
1396
- sig { params(example: T.untyped).returns(T.nilable(String)) }
1397
- def extract_reasoning_from_example(example)
1398
- case example
1399
- when DSPy::Example
1400
- if example.expected_values.key?(:reasoning)
1401
- example.expected_values[:reasoning]
1402
- elsif example.expected_values.key?(:explanation)
1403
- example.expected_values[:explanation]
1404
- else
1405
- nil
1406
- end
1407
- else
1408
- nil
1409
- end
1410
- end
1411
-
1412
- # Infer auto mode based on configuration
1413
- sig { returns(String) }
1414
- def infer_auto_mode
1415
- case config.num_trials
1416
- when 0..6 then "light"
1417
- when 7..12 then "medium"
1418
- else "heavy"
1419
- end
1420
- end
1421
- end
1422
- end
1423
- end