dspy 0.28.1 → 0.29.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +2 -3
- data/lib/dspy/callbacks.rb +222 -0
- data/lib/dspy/chain_of_thought.rb +2 -1
- data/lib/dspy/code_act.rb +14 -1
- data/lib/dspy/datasets/ade.rb +90 -0
- data/lib/dspy/datasets.rb +8 -0
- data/lib/dspy/lm.rb +9 -12
- data/lib/dspy/mixins/struct_builder.rb +17 -25
- data/lib/dspy/module.rb +45 -1
- data/lib/dspy/observability/async_span_processor.rb +67 -93
- data/lib/dspy/observability.rb +43 -1
- data/lib/dspy/predict.rb +17 -0
- data/lib/dspy/prompt.rb +90 -20
- data/lib/dspy/propose/dataset_summary_generator.rb +210 -0
- data/lib/dspy/propose/grounded_proposer.rb +320 -66
- data/lib/dspy/re_act.rb +13 -0
- data/lib/dspy/reflection_lm.rb +36 -0
- data/lib/dspy/teleprompt/bootstrap_strategy.rb +26 -0
- data/lib/dspy/teleprompt/gepa.rb +448 -2803
- data/lib/dspy/teleprompt/mipro_v2.rb +624 -100
- data/lib/dspy/teleprompt/utils.rb +349 -42
- data/lib/dspy/version.rb +2 -2
- data/lib/dspy.rb +4 -2
- data/lib/gepa/api.rb +61 -0
- data/lib/gepa/core/engine.rb +226 -0
- data/lib/gepa/core/evaluation_batch.rb +26 -0
- data/lib/gepa/core/result.rb +92 -0
- data/lib/gepa/core/state.rb +231 -0
- data/lib/gepa/logging/experiment_tracker.rb +54 -0
- data/lib/gepa/logging/logger.rb +57 -0
- data/lib/gepa/logging.rb +9 -0
- data/lib/gepa/proposer/base.rb +27 -0
- data/lib/gepa/proposer/merge_proposer.rb +424 -0
- data/lib/gepa/proposer/reflective_mutation/base.rb +48 -0
- data/lib/gepa/proposer/reflective_mutation/reflective_mutation.rb +188 -0
- data/lib/gepa/strategies/batch_sampler.rb +91 -0
- data/lib/gepa/strategies/candidate_selector.rb +97 -0
- data/lib/gepa/strategies/component_selector.rb +57 -0
- data/lib/gepa/strategies/instruction_proposal.rb +120 -0
- data/lib/gepa/telemetry.rb +122 -0
- data/lib/gepa/utils/pareto.rb +119 -0
- data/lib/gepa.rb +21 -0
- metadata +59 -4
- data/lib/dspy/teleprompt/simple_optimizer.rb +0 -497
@@ -1,7 +1,10 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
2
|
|
3
3
|
require 'digest'
|
4
|
+
require 'time'
|
5
|
+
require 'concurrent-ruby'
|
4
6
|
require 'sorbet-runtime'
|
7
|
+
require 'securerandom'
|
5
8
|
require_relative 'teleprompter'
|
6
9
|
require_relative 'utils'
|
7
10
|
require_relative '../propose/grounded_proposer'
|
@@ -124,6 +127,7 @@ module DSPy
|
|
124
127
|
setting :track_diversity, default: true
|
125
128
|
setting :max_errors, default: 3
|
126
129
|
setting :num_threads, default: 1
|
130
|
+
setting :minibatch_size, default: nil
|
127
131
|
|
128
132
|
# Class-level configuration method - sets defaults for new instances
|
129
133
|
def self.configure(&block)
|
@@ -265,6 +269,7 @@ module DSPy
|
|
265
269
|
@proposer = DSPy::Propose::GroundedProposer.new(config: DSPy::Propose::GroundedProposer::Config.new)
|
266
270
|
@optimization_trace = []
|
267
271
|
@evaluated_candidates = []
|
272
|
+
@trial_history = {}
|
268
273
|
end
|
269
274
|
|
270
275
|
# Main MIPROv2 optimization method
|
@@ -282,7 +287,7 @@ module DSPy
|
|
282
287
|
trainset_size: trainset.size,
|
283
288
|
valset_size: valset&.size || 0,
|
284
289
|
num_trials: config.num_trials,
|
285
|
-
optimization_strategy:
|
290
|
+
optimization_strategy: optimization_strategy_name,
|
286
291
|
mode: infer_auto_mode
|
287
292
|
}) do
|
288
293
|
# Convert examples to typed format
|
@@ -294,18 +299,18 @@ module DSPy
|
|
294
299
|
|
295
300
|
# Phase 1: Bootstrap few-shot examples
|
296
301
|
emit_event('phase_start', { phase: 1, name: 'bootstrap' })
|
297
|
-
|
298
|
-
emit_event('phase_complete', {
|
299
|
-
phase: 1,
|
300
|
-
|
301
|
-
|
302
|
+
demo_candidates = phase_1_bootstrap(program, typed_trainset)
|
303
|
+
emit_event('phase_complete', {
|
304
|
+
phase: 1,
|
305
|
+
num_predictors: demo_candidates.keys.size,
|
306
|
+
demo_sets_per_predictor: demo_candidates[0]&.size || 0
|
302
307
|
})
|
303
308
|
|
304
309
|
# Phase 2: Generate instruction candidates
|
305
310
|
emit_event('phase_start', { phase: 2, name: 'instruction_proposal' })
|
306
|
-
proposal_result = phase_2_propose_instructions(program, typed_trainset,
|
307
|
-
emit_event('phase_complete', {
|
308
|
-
phase: 2,
|
311
|
+
proposal_result = phase_2_propose_instructions(program, typed_trainset, demo_candidates)
|
312
|
+
emit_event('phase_complete', {
|
313
|
+
phase: 2,
|
309
314
|
num_candidates: proposal_result.num_candidates,
|
310
315
|
best_instruction_preview: proposal_result.best_instruction[0, 50]
|
311
316
|
})
|
@@ -316,7 +321,7 @@ module DSPy
|
|
316
321
|
program,
|
317
322
|
evaluation_set,
|
318
323
|
proposal_result,
|
319
|
-
|
324
|
+
demo_candidates
|
320
325
|
)
|
321
326
|
emit_event('phase_complete', {
|
322
327
|
phase: 3,
|
@@ -327,10 +332,12 @@ module DSPy
|
|
327
332
|
# Build final result
|
328
333
|
final_result = build_miprov2_result(
|
329
334
|
optimization_result,
|
330
|
-
|
335
|
+
demo_candidates,
|
331
336
|
proposal_result
|
332
337
|
)
|
333
338
|
|
339
|
+
@trial_history = optimization_result[:trial_logs] || {}
|
340
|
+
|
334
341
|
save_results(final_result)
|
335
342
|
final_result
|
336
343
|
end
|
@@ -339,16 +346,17 @@ module DSPy
|
|
339
346
|
private
|
340
347
|
|
341
348
|
# Phase 1: Bootstrap few-shot examples from training data
|
342
|
-
|
349
|
+
# Returns a hash mapping predictor indices to arrays of demo sets
|
350
|
+
sig { params(program: T.untyped, trainset: T::Array[DSPy::Example]).returns(T::Hash[Integer, T::Array[T::Array[DSPy::FewShotExample]]]) }
|
343
351
|
def phase_1_bootstrap(program, trainset)
|
344
|
-
|
345
|
-
|
346
|
-
|
347
|
-
|
348
|
-
|
349
|
-
|
350
|
-
|
351
|
-
|
352
|
+
Utils.create_n_fewshot_demo_sets(
|
353
|
+
program,
|
354
|
+
config.bootstrap_sets, # num_candidate_sets
|
355
|
+
trainset,
|
356
|
+
max_bootstrapped_demos: config.max_bootstrapped_examples,
|
357
|
+
max_labeled_demos: config.max_labeled_examples,
|
358
|
+
metric: @metric
|
359
|
+
)
|
352
360
|
end
|
353
361
|
|
354
362
|
# Phase 2: Generate instruction candidates using grounded proposer
|
@@ -356,28 +364,34 @@ module DSPy
|
|
356
364
|
params(
|
357
365
|
program: T.untyped,
|
358
366
|
trainset: T::Array[DSPy::Example],
|
359
|
-
|
367
|
+
demo_candidates: T::Hash[Integer, T::Array[T::Array[DSPy::FewShotExample]]]
|
360
368
|
).returns(DSPy::Propose::GroundedProposer::ProposalResult)
|
361
369
|
end
|
362
|
-
def phase_2_propose_instructions(program, trainset,
|
370
|
+
def phase_2_propose_instructions(program, trainset, demo_candidates)
|
363
371
|
# Get current instruction if available
|
364
372
|
current_instruction = extract_current_instruction(program)
|
365
|
-
|
373
|
+
|
366
374
|
# Use few-shot examples from bootstrap if available
|
367
|
-
|
375
|
+
# Flatten demo sets from first predictor and take first 5 examples
|
376
|
+
few_shot_examples = demo_candidates[0]&.flatten&.take(5) || []
|
368
377
|
|
369
|
-
#
|
370
|
-
|
371
|
-
|
378
|
+
# Re-initialize proposer with program and trainset for awareness features
|
379
|
+
# This enables program_aware and use_dataset_summary flags to work correctly
|
380
|
+
proposer_config = DSPy::Propose::GroundedProposer::Config.new
|
381
|
+
proposer_config.num_instruction_candidates = config.num_instruction_candidates
|
372
382
|
|
373
|
-
|
374
|
-
|
383
|
+
@proposer = DSPy::Propose::GroundedProposer.new(
|
384
|
+
config: proposer_config,
|
385
|
+
program: program,
|
386
|
+
trainset: trainset
|
387
|
+
)
|
375
388
|
|
376
|
-
@proposer.
|
377
|
-
|
378
|
-
|
379
|
-
|
380
|
-
|
389
|
+
@proposer.propose_instructions_for_program(
|
390
|
+
trainset: trainset,
|
391
|
+
program: program,
|
392
|
+
demo_candidates: demo_candidates,
|
393
|
+
trial_logs: @trial_history,
|
394
|
+
num_instruction_candidates: config.num_instruction_candidates
|
381
395
|
)
|
382
396
|
end
|
383
397
|
|
@@ -387,21 +401,27 @@ module DSPy
|
|
387
401
|
program: T.untyped,
|
388
402
|
evaluation_set: T::Array[DSPy::Example],
|
389
403
|
proposal_result: DSPy::Propose::GroundedProposer::ProposalResult,
|
390
|
-
|
404
|
+
demo_candidates: T::Hash[Integer, T::Array[T::Array[DSPy::FewShotExample]]]
|
391
405
|
).returns(T::Hash[Symbol, T.untyped])
|
392
406
|
end
|
393
|
-
def phase_3_optimize(program, evaluation_set, proposal_result,
|
407
|
+
def phase_3_optimize(program, evaluation_set, proposal_result, demo_candidates)
|
394
408
|
# Generate candidate configurations
|
395
|
-
candidates = generate_candidate_configurations(proposal_result,
|
409
|
+
candidates = generate_candidate_configurations(proposal_result, demo_candidates)
|
396
410
|
|
397
411
|
# Initialize optimization state
|
398
412
|
optimization_state = initialize_optimization_state(candidates)
|
399
|
-
|
413
|
+
|
414
|
+
# Initialize trial tracking structures
|
415
|
+
trial_logs = {}
|
416
|
+
param_score_dict = Hash.new { |hash, key| hash[key] = [] }
|
417
|
+
fully_evaled_param_combos = {}
|
418
|
+
total_eval_calls = 0
|
419
|
+
|
400
420
|
# Run optimization trials
|
401
421
|
trials_completed = 0
|
402
422
|
best_score = 0.0
|
403
423
|
best_candidate = nil
|
404
|
-
best_program =
|
424
|
+
best_program = program
|
405
425
|
best_evaluation_result = nil
|
406
426
|
|
407
427
|
config.num_trials.times do |trial_idx|
|
@@ -409,6 +429,14 @@ module DSPy
|
|
409
429
|
|
410
430
|
# Select next candidate based on optimization strategy
|
411
431
|
candidate = select_next_candidate(candidates, optimization_state, trial_idx)
|
432
|
+
batch_size = evaluation_set.size
|
433
|
+
|
434
|
+
trial_logs[trials_completed] = create_trial_log_entry(
|
435
|
+
trial_number: trials_completed,
|
436
|
+
candidate: candidate,
|
437
|
+
evaluation_type: :full,
|
438
|
+
batch_size: batch_size
|
439
|
+
)
|
412
440
|
|
413
441
|
emit_event('trial_start', {
|
414
442
|
trial_number: trials_completed,
|
@@ -420,12 +448,30 @@ module DSPy
|
|
420
448
|
begin
|
421
449
|
# Evaluate candidate
|
422
450
|
score, modified_program, evaluation_result = evaluate_candidate(program, candidate, evaluation_set)
|
451
|
+
total_eval_calls += batch_size
|
452
|
+
|
453
|
+
instructions_snapshot = extract_program_instructions(modified_program)
|
454
|
+
trial_logs[trials_completed][:instructions] = instructions_snapshot unless instructions_snapshot.empty?
|
455
|
+
trial_logs[trials_completed][:instruction] = instructions_snapshot[0] if instructions_snapshot.key?(0)
|
423
456
|
|
424
457
|
# Update optimization state
|
425
458
|
update_optimization_state(optimization_state, candidate, score)
|
459
|
+
record_param_score(
|
460
|
+
param_score_dict,
|
461
|
+
candidate,
|
462
|
+
score,
|
463
|
+
evaluation_type: :full,
|
464
|
+
instructions: instructions_snapshot
|
465
|
+
)
|
466
|
+
update_fully_evaled_param_combos(
|
467
|
+
fully_evaled_param_combos,
|
468
|
+
candidate,
|
469
|
+
score,
|
470
|
+
instructions: instructions_snapshot
|
471
|
+
)
|
426
472
|
|
427
473
|
# Track best result
|
428
|
-
is_best = score > best_score
|
474
|
+
is_best = best_candidate.nil? || score > best_score
|
429
475
|
if is_best
|
430
476
|
best_score = score
|
431
477
|
best_candidate = candidate
|
@@ -433,6 +479,15 @@ module DSPy
|
|
433
479
|
best_evaluation_result = evaluation_result
|
434
480
|
end
|
435
481
|
|
482
|
+
finalize_trial_log_entry(
|
483
|
+
trial_logs,
|
484
|
+
trials_completed,
|
485
|
+
score: score,
|
486
|
+
evaluation_type: :full,
|
487
|
+
batch_size: batch_size,
|
488
|
+
total_eval_calls: total_eval_calls
|
489
|
+
)
|
490
|
+
|
436
491
|
emit_event('trial_complete', {
|
437
492
|
trial_number: trials_completed,
|
438
493
|
score: score,
|
@@ -447,6 +502,16 @@ module DSPy
|
|
447
502
|
end
|
448
503
|
|
449
504
|
rescue => error
|
505
|
+
finalize_trial_log_entry(
|
506
|
+
trial_logs,
|
507
|
+
trials_completed,
|
508
|
+
score: nil,
|
509
|
+
evaluation_type: :full,
|
510
|
+
batch_size: batch_size,
|
511
|
+
total_eval_calls: total_eval_calls,
|
512
|
+
error: error.message
|
513
|
+
)
|
514
|
+
|
450
515
|
emit_event('trial_error', {
|
451
516
|
trial_number: trials_completed,
|
452
517
|
error: error.message,
|
@@ -464,73 +529,190 @@ module DSPy
|
|
464
529
|
best_evaluation_result: best_evaluation_result,
|
465
530
|
trials_completed: trials_completed,
|
466
531
|
optimization_state: optimization_state,
|
467
|
-
evaluated_candidates: @evaluated_candidates
|
532
|
+
evaluated_candidates: @evaluated_candidates,
|
533
|
+
trial_logs: trial_logs,
|
534
|
+
param_score_dict: param_score_dict,
|
535
|
+
fully_evaled_param_combos: fully_evaled_param_combos,
|
536
|
+
total_eval_calls: total_eval_calls
|
468
537
|
}
|
469
538
|
end
|
470
539
|
|
471
|
-
# Generate candidate configurations from proposals and
|
540
|
+
# Generate candidate configurations from proposals and demo candidates
|
472
541
|
sig do
|
473
542
|
params(
|
474
543
|
proposal_result: DSPy::Propose::GroundedProposer::ProposalResult,
|
475
|
-
|
544
|
+
demo_candidates: T::Hash[Integer, T::Array[T::Array[DSPy::FewShotExample]]]
|
476
545
|
).returns(T::Array[EvaluatedCandidate])
|
477
546
|
end
|
478
|
-
def generate_candidate_configurations(proposal_result,
|
547
|
+
def generate_candidate_configurations(proposal_result, demo_candidates)
|
479
548
|
candidates = []
|
480
|
-
|
549
|
+
|
550
|
+
predictor_instruction_map = if proposal_result.respond_to?(:predictor_instructions) && proposal_result.predictor_instructions.any?
|
551
|
+
proposal_result.predictor_instructions
|
552
|
+
else
|
553
|
+
{ 0 => proposal_result.candidate_instructions }
|
554
|
+
end
|
555
|
+
|
556
|
+
instruction_maps = build_instruction_maps(predictor_instruction_map)
|
557
|
+
demo_maps = build_demo_maps(demo_candidates)
|
558
|
+
|
481
559
|
# Base configuration (no modifications)
|
482
560
|
candidates << EvaluatedCandidate.new(
|
483
561
|
instruction: "",
|
484
562
|
few_shot_examples: [],
|
485
563
|
type: CandidateType::Baseline,
|
486
|
-
metadata: {
|
564
|
+
metadata: {
|
565
|
+
instructions_map: {},
|
566
|
+
demos_map: {}
|
567
|
+
},
|
487
568
|
config_id: SecureRandom.hex(6)
|
488
569
|
)
|
489
|
-
|
490
|
-
|
491
|
-
|
570
|
+
|
571
|
+
instruction_maps.each_with_index do |instruction_map, combo_idx|
|
572
|
+
primary_instruction = instruction_map[0] || instruction_map.values.first || ""
|
492
573
|
candidates << EvaluatedCandidate.new(
|
493
|
-
instruction:
|
574
|
+
instruction: primary_instruction,
|
494
575
|
few_shot_examples: [],
|
495
576
|
type: CandidateType::InstructionOnly,
|
496
|
-
metadata: {
|
577
|
+
metadata: {
|
578
|
+
proposal_rank: combo_idx,
|
579
|
+
instructions_map: duplicate_instruction_map(instruction_map),
|
580
|
+
demos_map: {}
|
581
|
+
},
|
497
582
|
config_id: SecureRandom.hex(6)
|
498
583
|
)
|
499
584
|
end
|
500
|
-
|
501
|
-
|
502
|
-
|
585
|
+
|
586
|
+
demo_maps.each_with_index do |demo_map, idx|
|
587
|
+
next if demo_map.empty?
|
588
|
+
|
589
|
+
flattened_examples = demo_map.values.flatten
|
503
590
|
candidates << EvaluatedCandidate.new(
|
504
591
|
instruction: "",
|
505
|
-
few_shot_examples:
|
592
|
+
few_shot_examples: flattened_examples,
|
506
593
|
type: CandidateType::FewShotOnly,
|
507
|
-
metadata: {
|
594
|
+
metadata: {
|
595
|
+
bootstrap_rank: idx,
|
596
|
+
instructions_map: {},
|
597
|
+
demos_map: duplicate_demo_map(demo_map)
|
598
|
+
},
|
508
599
|
config_id: SecureRandom.hex(6)
|
509
600
|
)
|
510
601
|
end
|
511
602
|
|
512
603
|
# Combined candidates (instruction + few-shot)
|
513
|
-
|
514
|
-
|
515
|
-
|
516
|
-
|
517
|
-
|
604
|
+
instruction_maps.each_with_index do |instruction_map, combo_idx|
|
605
|
+
primary_instruction = instruction_map[0] || instruction_map.values.first || ""
|
606
|
+
demo_maps.first(3).each_with_index do |demo_map, demo_idx|
|
607
|
+
next if demo_map.empty?
|
608
|
+
|
609
|
+
flattened_examples = demo_map.values.flatten
|
518
610
|
candidates << EvaluatedCandidate.new(
|
519
|
-
instruction:
|
520
|
-
few_shot_examples:
|
611
|
+
instruction: primary_instruction,
|
612
|
+
few_shot_examples: flattened_examples,
|
521
613
|
type: CandidateType::Combined,
|
522
|
-
metadata: {
|
523
|
-
instruction_rank:
|
524
|
-
bootstrap_rank:
|
614
|
+
metadata: {
|
615
|
+
instruction_rank: combo_idx,
|
616
|
+
bootstrap_rank: demo_idx,
|
617
|
+
instructions_map: duplicate_instruction_map(instruction_map),
|
618
|
+
demos_map: duplicate_demo_map(demo_map)
|
525
619
|
},
|
526
620
|
config_id: SecureRandom.hex(6)
|
527
621
|
)
|
528
622
|
end
|
529
623
|
end
|
530
|
-
|
624
|
+
|
531
625
|
candidates
|
532
626
|
end
|
533
627
|
|
628
|
+
sig { params(predictor_instruction_map: T::Hash[Integer, T::Array[String]]).returns(T::Array[T::Hash[Integer, String]]) }
|
629
|
+
def build_instruction_maps(predictor_instruction_map)
|
630
|
+
return [{}] if predictor_instruction_map.nil? || predictor_instruction_map.empty?
|
631
|
+
|
632
|
+
normalized = predictor_instruction_map.each_with_object({}) do |(index, instructions), memo|
|
633
|
+
next if instructions.nil? || instructions.empty?
|
634
|
+
memo[index] = instructions.take(3)
|
635
|
+
end
|
636
|
+
|
637
|
+
return [{}] if normalized.empty?
|
638
|
+
|
639
|
+
cartesian_product(normalized)
|
640
|
+
end
|
641
|
+
|
642
|
+
sig do
|
643
|
+
params(demo_candidates: T::Hash[Integer, T::Array[T::Array[DSPy::FewShotExample]]]).returns(T::Array[T::Hash[Integer, T::Array[DSPy::FewShotExample]]])
|
644
|
+
end
|
645
|
+
def build_demo_maps(demo_candidates)
|
646
|
+
return [{}] if demo_candidates.nil? || demo_candidates.empty?
|
647
|
+
|
648
|
+
normalized = demo_candidates.each_with_object({}) do |(index, sets), memo|
|
649
|
+
next if sets.nil? || sets.empty?
|
650
|
+
memo[index] = sets.take(3)
|
651
|
+
end
|
652
|
+
|
653
|
+
return [{}] if normalized.empty?
|
654
|
+
|
655
|
+
cartesian_product(normalized)
|
656
|
+
end
|
657
|
+
|
658
|
+
sig do
|
659
|
+
params(options_hash: T::Hash[Integer, T::Array[T.untyped]]).returns(T::Array[T::Hash[Integer, T.untyped]])
|
660
|
+
end
|
661
|
+
def cartesian_product(options_hash)
|
662
|
+
options_hash.sort_by { |index, _| index }.reduce([{}]) do |acc, (index, values)|
|
663
|
+
next acc if values.nil? || values.empty?
|
664
|
+
|
665
|
+
acc.flat_map do |existing|
|
666
|
+
values.map do |value|
|
667
|
+
existing.merge(index => value)
|
668
|
+
end
|
669
|
+
end
|
670
|
+
end
|
671
|
+
end
|
672
|
+
|
673
|
+
sig { params(instruction_map: T::Hash[Integer, String]).returns(T::Hash[Integer, String]) }
|
674
|
+
def duplicate_instruction_map(instruction_map)
|
675
|
+
instruction_map.each_with_object({}) do |(index, instruction), memo|
|
676
|
+
memo[index] = instruction.is_a?(String) ? instruction.dup : instruction
|
677
|
+
end
|
678
|
+
end
|
679
|
+
|
680
|
+
sig do
|
681
|
+
params(demo_map: T::Hash[Integer, T::Array[DSPy::FewShotExample]]).returns(T::Hash[Integer, T::Array[DSPy::FewShotExample]])
|
682
|
+
end
|
683
|
+
def duplicate_demo_map(demo_map)
|
684
|
+
demo_map.each_with_object({}) do |(index, demos), memo|
|
685
|
+
next if demos.nil?
|
686
|
+
memo[index] = demos.map { |demo| demo }
|
687
|
+
end
|
688
|
+
end
|
689
|
+
|
690
|
+
sig { params(examples: T::Array[T.untyped]).returns(T::Array[DSPy::FewShotExample]) }
|
691
|
+
def normalize_few_shot_examples(examples)
|
692
|
+
examples.map do |example|
|
693
|
+
if example.is_a?(DSPy::FewShotExample)
|
694
|
+
example
|
695
|
+
elsif example.is_a?(DSPy::Example)
|
696
|
+
DSPy::FewShotExample.new(
|
697
|
+
input: example.input_values,
|
698
|
+
output: example.expected_values,
|
699
|
+
reasoning: extract_reasoning_from_example(example)
|
700
|
+
)
|
701
|
+
else
|
702
|
+
example
|
703
|
+
end
|
704
|
+
end
|
705
|
+
end
|
706
|
+
|
707
|
+
sig { params(predictor: T.untyped, examples: T::Array[DSPy::FewShotExample]).void }
|
708
|
+
def assign_predictor_examples(predictor, examples)
|
709
|
+
predictor.demos = examples if predictor.respond_to?(:demos=)
|
710
|
+
return unless predictor.respond_to?(:prompt)
|
711
|
+
|
712
|
+
cloned_examples = examples.map { |ex| ex }
|
713
|
+
predictor.prompt.instance_variable_set(:@few_shot_examples, cloned_examples.freeze)
|
714
|
+
end
|
715
|
+
|
534
716
|
# Initialize optimization state for candidate selection
|
535
717
|
sig { params(candidates: T::Array[EvaluatedCandidate]).returns(T::Hash[Symbol, T.untyped]) }
|
536
718
|
def initialize_optimization_state(candidates)
|
@@ -685,10 +867,10 @@ module DSPy
|
|
685
867
|
features << ((config_hash / 1000) % 1000).to_f / 1000.0 # Feature 2: different part of hash
|
686
868
|
features << ((config_hash / 1_000_000) % 1000).to_f / 1000.0 # Feature 3: high bits
|
687
869
|
|
688
|
-
# Add instruction length if available
|
870
|
+
# Add instruction length if available (Python-compatible: no cap)
|
689
871
|
instruction = candidate.instruction
|
690
872
|
if instruction && !instruction.empty?
|
691
|
-
features <<
|
873
|
+
features << instruction.length.to_f / 100.0 # Instruction length, uncapped
|
692
874
|
else
|
693
875
|
features << 0.5 # Default value
|
694
876
|
end
|
@@ -710,7 +892,11 @@ module DSPy
|
|
710
892
|
modified_program = apply_candidate_configuration(program, candidate)
|
711
893
|
|
712
894
|
# Evaluate modified program
|
713
|
-
evaluation_result =
|
895
|
+
evaluation_result = if use_concurrent_evaluation?(evaluation_set)
|
896
|
+
evaluate_candidate_concurrently(modified_program, evaluation_set)
|
897
|
+
else
|
898
|
+
evaluate_program(modified_program, evaluation_set)
|
899
|
+
end
|
714
900
|
|
715
901
|
# Store evaluation details
|
716
902
|
@evaluated_candidates << candidate
|
@@ -718,26 +904,131 @@ module DSPy
|
|
718
904
|
[evaluation_result.pass_rate, modified_program, evaluation_result]
|
719
905
|
end
|
720
906
|
|
907
|
+
sig { params(evaluation_set: T::Array[DSPy::Example]).returns(T::Boolean) }
|
908
|
+
def use_concurrent_evaluation?(evaluation_set)
|
909
|
+
minibatch_size = config.minibatch_size
|
910
|
+
return false unless minibatch_size&.positive?
|
911
|
+
return false unless config.num_threads && config.num_threads > 1
|
912
|
+
|
913
|
+
evaluation_set.size > minibatch_size
|
914
|
+
end
|
915
|
+
|
916
|
+
sig do
|
917
|
+
params(
|
918
|
+
modified_program: T.untyped,
|
919
|
+
evaluation_set: T::Array[DSPy::Example]
|
920
|
+
).returns(DSPy::Evaluate::BatchEvaluationResult)
|
921
|
+
end
|
922
|
+
def evaluate_candidate_concurrently(modified_program, evaluation_set)
|
923
|
+
chunk_size = T.must(config.minibatch_size)
|
924
|
+
chunks = evaluation_set.each_slice(chunk_size).map(&:dup)
|
925
|
+
return evaluate_program(modified_program, evaluation_set) if chunks.size <= 1
|
926
|
+
|
927
|
+
pool_size = [config.num_threads, chunks.size].min
|
928
|
+
pool_size = 1 if pool_size <= 0
|
929
|
+
executor = Concurrent::FixedThreadPool.new(pool_size)
|
930
|
+
|
931
|
+
futures = chunks.map do |chunk|
|
932
|
+
Concurrent::Promises.future_on(executor) do
|
933
|
+
evaluate_program(modified_program, chunk)
|
934
|
+
end
|
935
|
+
end
|
936
|
+
|
937
|
+
results = futures.map(&:value!)
|
938
|
+
combine_batch_results(results)
|
939
|
+
ensure
|
940
|
+
if executor
|
941
|
+
executor.shutdown
|
942
|
+
executor.wait_for_termination
|
943
|
+
end
|
944
|
+
end
|
945
|
+
|
946
|
+
sig do
|
947
|
+
params(batch_results: T::Array[DSPy::Evaluate::BatchEvaluationResult]).returns(DSPy::Evaluate::BatchEvaluationResult)
|
948
|
+
end
|
949
|
+
def combine_batch_results(batch_results)
|
950
|
+
return DSPy::Evaluate::BatchEvaluationResult.new(results: [], aggregated_metrics: {}) if batch_results.empty?
|
951
|
+
|
952
|
+
combined_results = batch_results.flat_map(&:results)
|
953
|
+
total_examples = batch_results.sum(&:total_examples)
|
954
|
+
aggregated_metrics = merge_aggregated_metrics(batch_results, total_examples)
|
955
|
+
|
956
|
+
DSPy::Evaluate::BatchEvaluationResult.new(
|
957
|
+
results: combined_results,
|
958
|
+
aggregated_metrics: aggregated_metrics
|
959
|
+
)
|
960
|
+
end
|
961
|
+
|
962
|
+
sig do
|
963
|
+
params(
|
964
|
+
batch_results: T::Array[DSPy::Evaluate::BatchEvaluationResult],
|
965
|
+
total_examples: Integer
|
966
|
+
).returns(T::Hash[Symbol, T.untyped])
|
967
|
+
end
|
968
|
+
def merge_aggregated_metrics(batch_results, total_examples)
|
969
|
+
return {} if total_examples.zero?
|
970
|
+
|
971
|
+
keys = batch_results.flat_map { |res| res.aggregated_metrics.keys }.uniq
|
972
|
+
keys.each_with_object({}) do |key, memo|
|
973
|
+
numeric_weight = 0.0
|
974
|
+
numeric_sum = 0.0
|
975
|
+
fallback_value = nil
|
976
|
+
|
977
|
+
batch_results.each do |res|
|
978
|
+
value = res.aggregated_metrics[key]
|
979
|
+
next if value.nil?
|
980
|
+
|
981
|
+
if value.is_a?(Numeric)
|
982
|
+
numeric_sum += value.to_f * res.total_examples
|
983
|
+
numeric_weight += res.total_examples
|
984
|
+
else
|
985
|
+
fallback_value = value
|
986
|
+
end
|
987
|
+
end
|
988
|
+
|
989
|
+
if numeric_weight.positive?
|
990
|
+
memo[key] = numeric_sum / numeric_weight
|
991
|
+
elsif fallback_value
|
992
|
+
memo[key] = fallback_value
|
993
|
+
end
|
994
|
+
end
|
995
|
+
end
|
996
|
+
|
721
997
|
# Apply candidate configuration to program
|
722
998
|
sig { params(program: T.untyped, candidate: EvaluatedCandidate).returns(T.untyped) }
|
723
999
|
def apply_candidate_configuration(program, candidate)
|
1000
|
+
instructions_map = candidate.metadata[:instructions_map] || {}
|
1001
|
+
demos_map = candidate.metadata[:demos_map] || {}
|
1002
|
+
|
724
1003
|
modified_program = program
|
725
|
-
|
726
|
-
|
727
|
-
|
1004
|
+
if modified_program.respond_to?(:predictors) && (instructions_map.any? || demos_map.any?)
|
1005
|
+
modified_program = modified_program.clone
|
1006
|
+
modified_program.predictors.each_with_index do |predictor, idx|
|
1007
|
+
if instructions_map.key?(idx)
|
1008
|
+
signature = Utils.get_signature(predictor)
|
1009
|
+
updated_signature = signature.with_instructions(instructions_map[idx])
|
1010
|
+
Utils.set_signature(predictor, updated_signature)
|
1011
|
+
end
|
1012
|
+
|
1013
|
+
if demos_map.key?(idx)
|
1014
|
+
normalized_examples = normalize_few_shot_examples(demos_map[idx])
|
1015
|
+
assign_predictor_examples(predictor, normalized_examples)
|
1016
|
+
end
|
1017
|
+
end
|
1018
|
+
end
|
1019
|
+
|
1020
|
+
# Apply instruction if provided (top-level programs still respect with_instruction)
|
1021
|
+
if !candidate.instruction.empty? && modified_program.respond_to?(:with_instruction)
|
728
1022
|
modified_program = modified_program.with_instruction(candidate.instruction)
|
729
1023
|
end
|
730
|
-
|
731
|
-
|
732
|
-
|
733
|
-
|
734
|
-
|
735
|
-
|
736
|
-
|
737
|
-
|
738
|
-
)
|
739
|
-
end
|
740
|
-
modified_program = modified_program.with_examples(few_shot_examples)
|
1024
|
+
|
1025
|
+
should_apply_global_examples = candidate.few_shot_examples.any? &&
|
1026
|
+
modified_program.respond_to?(:with_examples) &&
|
1027
|
+
(demos_map.empty? || !modified_program.respond_to?(:predictors))
|
1028
|
+
|
1029
|
+
if should_apply_global_examples
|
1030
|
+
normalized_few_shot = normalize_few_shot_examples(candidate.few_shot_examples)
|
1031
|
+
modified_program = modified_program.with_examples(normalized_few_shot)
|
741
1032
|
end
|
742
1033
|
|
743
1034
|
modified_program
|
@@ -779,48 +1070,66 @@ module DSPy
|
|
779
1070
|
state[:no_improvement_count] >= config.early_stopping_patience
|
780
1071
|
end
|
781
1072
|
|
782
|
-
# Calculate diversity score for candidate
|
1073
|
+
# Calculate diversity score for candidate (Python-compatible: only few-shot count)
|
783
1074
|
sig { params(candidate: EvaluatedCandidate).returns(Float) }
|
784
1075
|
def calculate_diversity_score(candidate)
|
785
|
-
#
|
786
|
-
instruction_diversity = candidate.instruction.length / 200.0
|
1076
|
+
# Python DSPy doesn't use instruction length for diversity, only few-shot count
|
787
1077
|
few_shot_diversity = candidate.few_shot_examples.size / 10.0
|
788
|
-
|
789
|
-
[
|
1078
|
+
|
1079
|
+
[few_shot_diversity, 1.0].min
|
790
1080
|
end
|
791
1081
|
|
792
1082
|
# Build final MIPROv2 result
|
793
1083
|
sig do
|
794
1084
|
params(
|
795
1085
|
optimization_result: T::Hash[Symbol, T.untyped],
|
796
|
-
|
1086
|
+
demo_candidates: T::Hash[Integer, T::Array[T::Array[DSPy::FewShotExample]]],
|
797
1087
|
proposal_result: DSPy::Propose::GroundedProposer::ProposalResult
|
798
1088
|
).returns(MIPROv2Result)
|
799
1089
|
end
|
800
|
-
def build_miprov2_result(optimization_result,
|
1090
|
+
def build_miprov2_result(optimization_result, demo_candidates, proposal_result)
|
801
1091
|
best_candidate = optimization_result[:best_candidate]
|
802
1092
|
best_program = optimization_result[:best_program]
|
803
1093
|
best_score = optimization_result[:best_score]
|
804
1094
|
best_evaluation_result = optimization_result[:best_evaluation_result]
|
805
|
-
|
1095
|
+
|
806
1096
|
scores = { pass_rate: best_score }
|
807
|
-
|
1097
|
+
|
808
1098
|
history = {
|
809
1099
|
total_trials: optimization_result[:trials_completed],
|
810
|
-
optimization_strategy:
|
1100
|
+
optimization_strategy: optimization_strategy_name,
|
811
1101
|
early_stopped: optimization_result[:trials_completed] < config.num_trials,
|
812
|
-
score_history: optimization_result[:optimization_state][:best_score_history]
|
1102
|
+
score_history: optimization_result[:optimization_state][:best_score_history],
|
1103
|
+
total_eval_calls: optimization_result[:total_eval_calls]
|
813
1104
|
}
|
814
|
-
|
1105
|
+
|
815
1106
|
metadata = {
|
816
1107
|
optimizer: "MIPROv2",
|
817
1108
|
auto_mode: infer_auto_mode,
|
1109
|
+
optimization_strategy: optimization_strategy_name,
|
818
1110
|
best_instruction: best_candidate&.instruction || "",
|
819
1111
|
best_few_shot_count: best_candidate&.few_shot_examples&.size || 0,
|
820
1112
|
best_candidate_type: best_candidate&.type&.serialize || "unknown",
|
821
1113
|
optimization_timestamp: Time.now.iso8601
|
822
1114
|
}
|
823
|
-
|
1115
|
+
|
1116
|
+
# Create bootstrap statistics from demo_candidates
|
1117
|
+
num_predictors = demo_candidates.keys.size
|
1118
|
+
sets_per_predictor = demo_candidates.values.map(&:size)
|
1119
|
+
all_demo_sets = demo_candidates.values.flat_map { |sets| sets }
|
1120
|
+
bootstrap_statistics = {
|
1121
|
+
num_predictors: num_predictors,
|
1122
|
+
demo_sets_per_predictor: sets_per_predictor.max || 0,
|
1123
|
+
avg_demos_per_set: all_demo_sets.empty? ? 0 : all_demo_sets.map(&:size).sum.to_f / all_demo_sets.size
|
1124
|
+
}
|
1125
|
+
bootstrap_statistics[:per_predictor_demo_counts] = sets_per_predictor if sets_per_predictor.any?
|
1126
|
+
|
1127
|
+
optimization_trace = serialize_optimization_trace(optimization_result[:optimization_state])
|
1128
|
+
optimization_trace[:trial_logs] = serialize_trial_logs(optimization_result[:trial_logs])
|
1129
|
+
optimization_trace[:param_score_dict] = serialize_param_score_dict(optimization_result[:param_score_dict])
|
1130
|
+
optimization_trace[:fully_evaled_param_combos] = serialize_fully_evaled_param_combos(optimization_result[:fully_evaled_param_combos])
|
1131
|
+
optimization_trace[:total_eval_calls] = optimization_result[:total_eval_calls]
|
1132
|
+
|
824
1133
|
MIPROv2Result.new(
|
825
1134
|
optimized_program: best_program,
|
826
1135
|
scores: scores,
|
@@ -829,8 +1138,8 @@ module DSPy
|
|
829
1138
|
best_score_value: best_score,
|
830
1139
|
metadata: metadata,
|
831
1140
|
evaluated_candidates: @evaluated_candidates,
|
832
|
-
optimization_trace:
|
833
|
-
bootstrap_statistics:
|
1141
|
+
optimization_trace: optimization_trace,
|
1142
|
+
bootstrap_statistics: bootstrap_statistics,
|
834
1143
|
proposal_statistics: proposal_result.analysis,
|
835
1144
|
best_evaluation_result: best_evaluation_result
|
836
1145
|
)
|
@@ -851,7 +1160,205 @@ module DSPy
|
|
851
1160
|
serialized_trace
|
852
1161
|
end
|
853
1162
|
|
1163
|
+
sig do
|
1164
|
+
params(
|
1165
|
+
trial_number: Integer,
|
1166
|
+
candidate: EvaluatedCandidate,
|
1167
|
+
evaluation_type: Symbol,
|
1168
|
+
batch_size: Integer
|
1169
|
+
).returns(T::Hash[Symbol, T.untyped])
|
1170
|
+
end
|
1171
|
+
def create_trial_log_entry(trial_number:, candidate:, evaluation_type:, batch_size:)
|
1172
|
+
# Preserve interface parity with Python implementation (trial number stored implicitly via hash key)
|
1173
|
+
trial_number # no-op to acknowledge parameter usage
|
1174
|
+
instructions_map = candidate.metadata[:instructions_map] || {}
|
1175
|
+
demos_map = candidate.metadata[:demos_map] || {}
|
1176
|
+
entry = {
|
1177
|
+
candidate_id: candidate.config_id,
|
1178
|
+
candidate_type: candidate.type.serialize,
|
1179
|
+
instruction_preview: candidate.instruction.to_s[0, 160],
|
1180
|
+
few_shot_count: candidate.few_shot_examples.size,
|
1181
|
+
metadata: deep_dup(candidate.metadata),
|
1182
|
+
evaluation_type: evaluation_type,
|
1183
|
+
batch_size: batch_size,
|
1184
|
+
status: :in_progress,
|
1185
|
+
started_at: Time.now.iso8601
|
1186
|
+
}
|
1187
|
+
if instructions_map.any?
|
1188
|
+
entry[:instructions] = duplicate_instruction_map(instructions_map)
|
1189
|
+
entry[:instruction] = entry[:instructions][0] if entry[:instructions].key?(0)
|
1190
|
+
elsif candidate.instruction && !candidate.instruction.empty?
|
1191
|
+
predictor_index = candidate.metadata[:predictor_index] || 0
|
1192
|
+
entry[:instruction] = candidate.instruction
|
1193
|
+
entry[:instructions] = { predictor_index => candidate.instruction }
|
1194
|
+
end
|
1195
|
+
entry[:few_shot_map] = duplicate_demo_map(demos_map) if demos_map.any?
|
1196
|
+
entry
|
1197
|
+
end
|
1198
|
+
|
1199
|
+
sig do
|
1200
|
+
params(
|
1201
|
+
trial_logs: T::Hash[Integer, T::Hash[Symbol, T.untyped]],
|
1202
|
+
trial_number: Integer,
|
1203
|
+
score: T.nilable(Float),
|
1204
|
+
evaluation_type: Symbol,
|
1205
|
+
batch_size: Integer,
|
1206
|
+
total_eval_calls: Integer,
|
1207
|
+
error: T.nilable(String)
|
1208
|
+
).void
|
1209
|
+
end
|
1210
|
+
def finalize_trial_log_entry(trial_logs, trial_number, score:, evaluation_type:, batch_size:, total_eval_calls:, error: nil)
|
1211
|
+
entry = trial_logs[trial_number] || {}
|
1212
|
+
entry[:score] = score if score
|
1213
|
+
entry[:evaluation_type] = evaluation_type
|
1214
|
+
entry[:batch_size] = batch_size
|
1215
|
+
entry[:total_eval_calls] = total_eval_calls
|
1216
|
+
entry[:status] = error ? :error : :completed
|
1217
|
+
entry[:error] = error if error
|
1218
|
+
entry[:completed_at] = Time.now.iso8601
|
1219
|
+
trial_logs[trial_number] = entry
|
1220
|
+
end
|
1221
|
+
|
1222
|
+
sig do
|
1223
|
+
params(
|
1224
|
+
param_score_dict: T::Hash[String, T::Array[T::Hash[Symbol, T.untyped]]],
|
1225
|
+
candidate: EvaluatedCandidate,
|
1226
|
+
score: Float,
|
1227
|
+
evaluation_type: Symbol,
|
1228
|
+
instructions: T.nilable(T::Hash[Integer, String])
|
1229
|
+
).void
|
1230
|
+
end
|
1231
|
+
def record_param_score(param_score_dict, candidate, score, evaluation_type:, instructions: nil)
|
1232
|
+
instructions_hash = instructions || {}
|
1233
|
+
if instructions_hash.empty? && candidate.instruction && !candidate.instruction.empty?
|
1234
|
+
predictor_index = candidate.metadata[:predictor_index] || 0
|
1235
|
+
instructions_hash[predictor_index] = candidate.instruction
|
1236
|
+
end
|
1237
|
+
|
1238
|
+
record = {
|
1239
|
+
candidate_id: candidate.config_id,
|
1240
|
+
candidate_type: candidate.type.serialize,
|
1241
|
+
score: score,
|
1242
|
+
evaluation_type: evaluation_type,
|
1243
|
+
timestamp: Time.now.iso8601,
|
1244
|
+
metadata: deep_dup(candidate.metadata)
|
1245
|
+
}
|
1246
|
+
primary_instruction = instructions_hash[0] || candidate.instruction
|
1247
|
+
record[:instruction] = primary_instruction if primary_instruction && !primary_instruction.empty?
|
1248
|
+
record[:instructions] = instructions_hash unless instructions_hash.empty?
|
1249
|
+
|
1250
|
+
param_score_dict[candidate.config_id] << record
|
1251
|
+
end
|
1252
|
+
|
1253
|
+
sig do
|
1254
|
+
params(
|
1255
|
+
fully_evaled_param_combos: T::Hash[String, T::Hash[Symbol, T.untyped]],
|
1256
|
+
candidate: EvaluatedCandidate,
|
1257
|
+
score: Float,
|
1258
|
+
instructions: T.nilable(T::Hash[Integer, String])
|
1259
|
+
).void
|
1260
|
+
end
|
1261
|
+
def update_fully_evaled_param_combos(fully_evaled_param_combos, candidate, score, instructions: nil)
|
1262
|
+
existing = fully_evaled_param_combos[candidate.config_id]
|
1263
|
+
if existing.nil? || score > existing[:score]
|
1264
|
+
instructions_hash = instructions || {}
|
1265
|
+
if instructions_hash.empty? && candidate.instruction && !candidate.instruction.empty?
|
1266
|
+
predictor_index = candidate.metadata[:predictor_index] || 0
|
1267
|
+
instructions_hash[predictor_index] = candidate.instruction
|
1268
|
+
end
|
1269
|
+
|
1270
|
+
fully_evaled_param_combos[candidate.config_id] = {
|
1271
|
+
candidate_id: candidate.config_id,
|
1272
|
+
candidate_type: candidate.type.serialize,
|
1273
|
+
score: score,
|
1274
|
+
metadata: deep_dup(candidate.metadata),
|
1275
|
+
updated_at: Time.now.iso8601
|
1276
|
+
}
|
1277
|
+
unless instructions_hash.empty?
|
1278
|
+
fully_evaled_param_combos[candidate.config_id][:instructions] = instructions_hash
|
1279
|
+
fully_evaled_param_combos[candidate.config_id][:instruction] = instructions_hash[0] || candidate.instruction
|
1280
|
+
end
|
1281
|
+
end
|
1282
|
+
end
|
1283
|
+
|
1284
|
+
sig { params(trial_logs: T.nilable(T::Hash[Integer, T::Hash[Symbol, T.untyped]])).returns(T::Hash[Integer, T::Hash[Symbol, T.untyped]]) }
|
1285
|
+
def serialize_trial_logs(trial_logs)
|
1286
|
+
return {} unless trial_logs
|
1287
|
+
|
1288
|
+
allowed_keys = [
|
1289
|
+
:candidate_id,
|
1290
|
+
:candidate_type,
|
1291
|
+
:instruction_preview,
|
1292
|
+
:instruction,
|
1293
|
+
:instructions,
|
1294
|
+
:few_shot_count,
|
1295
|
+
:metadata,
|
1296
|
+
:evaluation_type,
|
1297
|
+
:batch_size,
|
1298
|
+
:score,
|
1299
|
+
:status,
|
1300
|
+
:error,
|
1301
|
+
:started_at,
|
1302
|
+
:completed_at,
|
1303
|
+
:total_eval_calls
|
1304
|
+
]
|
1305
|
+
|
1306
|
+
trial_logs.transform_values do |entry|
|
1307
|
+
entry.each_with_object({}) do |(key, value), memo|
|
1308
|
+
memo[key] = value if allowed_keys.include?(key)
|
1309
|
+
end
|
1310
|
+
end
|
1311
|
+
end
|
1312
|
+
|
1313
|
+
sig { params(param_score_dict: T.nilable(T::Hash[String, T::Array[T::Hash[Symbol, T.untyped]]])).returns(T::Hash[String, T::Array[T::Hash[Symbol, T.untyped]]]) }
|
1314
|
+
def serialize_param_score_dict(param_score_dict)
|
1315
|
+
return {} unless param_score_dict
|
1316
|
+
|
1317
|
+
allowed_keys = [:candidate_id, :candidate_type, :score, :evaluation_type, :timestamp, :metadata, :instruction, :instructions]
|
1318
|
+
|
1319
|
+
param_score_dict.transform_values do |records|
|
1320
|
+
records.map do |record|
|
1321
|
+
record.each_with_object({}) do |(key, value), memo|
|
1322
|
+
memo[key] = value if allowed_keys.include?(key)
|
1323
|
+
end
|
1324
|
+
end
|
1325
|
+
end
|
1326
|
+
end
|
1327
|
+
|
1328
|
+
sig { params(fully_evaled_param_combos: T.nilable(T::Hash[String, T::Hash[Symbol, T.untyped]])).returns(T::Hash[String, T::Hash[Symbol, T.untyped]]) }
|
1329
|
+
def serialize_fully_evaled_param_combos(fully_evaled_param_combos)
|
1330
|
+
return {} unless fully_evaled_param_combos
|
1331
|
+
|
1332
|
+
allowed_keys = [:candidate_id, :candidate_type, :score, :metadata, :updated_at, :instruction, :instructions]
|
1333
|
+
|
1334
|
+
fully_evaled_param_combos.transform_values do |record|
|
1335
|
+
record.each_with_object({}) do |(key, value), memo|
|
1336
|
+
memo[key] = value if allowed_keys.include?(key)
|
1337
|
+
end
|
1338
|
+
end
|
1339
|
+
end
|
1340
|
+
|
1341
|
+
sig { params(value: T.untyped).returns(T.untyped) }
|
1342
|
+
def deep_dup(value)
|
1343
|
+
case value
|
1344
|
+
when Hash
|
1345
|
+
value.each_with_object({}) { |(k, v), memo| memo[k] = deep_dup(v) }
|
1346
|
+
when Array
|
1347
|
+
value.map { |element| deep_dup(element) }
|
1348
|
+
else
|
1349
|
+
value
|
1350
|
+
end
|
1351
|
+
end
|
1352
|
+
|
854
1353
|
# Helper methods
|
1354
|
+
sig { returns(String) }
|
1355
|
+
def optimization_strategy_name
|
1356
|
+
strategy = config.optimization_strategy
|
1357
|
+
return strategy.serialize if strategy.respond_to?(:serialize)
|
1358
|
+
|
1359
|
+
strategy.to_s
|
1360
|
+
end
|
1361
|
+
|
855
1362
|
sig { params(program: T.untyped).returns(T.nilable(String)) }
|
856
1363
|
def extract_current_instruction(program)
|
857
1364
|
if program.respond_to?(:prompt) && program.prompt.respond_to?(:instruction)
|
@@ -864,6 +1371,23 @@ module DSPy
|
|
864
1371
|
end
|
865
1372
|
end
|
866
1373
|
|
1374
|
+
sig { params(program: T.untyped).returns(T::Hash[Integer, String]) }
|
1375
|
+
def extract_program_instructions(program)
|
1376
|
+
instructions = {}
|
1377
|
+
if program.respond_to?(:predictors)
|
1378
|
+
program.predictors.each_with_index do |predictor, index|
|
1379
|
+
if predictor.respond_to?(:prompt) && predictor.prompt.respond_to?(:instruction)
|
1380
|
+
value = predictor.prompt.instruction
|
1381
|
+
instructions[index] = value if value
|
1382
|
+
end
|
1383
|
+
end
|
1384
|
+
else
|
1385
|
+
fallback_instruction = extract_current_instruction(program)
|
1386
|
+
instructions[0] = fallback_instruction if fallback_instruction
|
1387
|
+
end
|
1388
|
+
instructions
|
1389
|
+
end
|
1390
|
+
|
867
1391
|
sig { params(program: T.untyped).returns(T.nilable(T.class_of(DSPy::Signature))) }
|
868
1392
|
def extract_signature_class(program)
|
869
1393
|
program.respond_to?(:signature_class) ? program.signature_class : nil
|
@@ -896,4 +1420,4 @@ module DSPy
|
|
896
1420
|
end
|
897
1421
|
end
|
898
1422
|
end
|
899
|
-
end
|
1423
|
+
end
|