dspy 0.28.1 → 0.29.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +2 -3
- data/lib/dspy/callbacks.rb +222 -0
- data/lib/dspy/chain_of_thought.rb +2 -1
- data/lib/dspy/code_act.rb +14 -1
- data/lib/dspy/datasets/ade.rb +90 -0
- data/lib/dspy/datasets.rb +8 -0
- data/lib/dspy/lm.rb +9 -12
- data/lib/dspy/mixins/struct_builder.rb +17 -25
- data/lib/dspy/module.rb +45 -1
- data/lib/dspy/observability/async_span_processor.rb +67 -93
- data/lib/dspy/observability.rb +43 -1
- data/lib/dspy/predict.rb +17 -0
- data/lib/dspy/prompt.rb +90 -20
- data/lib/dspy/propose/dataset_summary_generator.rb +210 -0
- data/lib/dspy/propose/grounded_proposer.rb +320 -66
- data/lib/dspy/re_act.rb +13 -0
- data/lib/dspy/reflection_lm.rb +36 -0
- data/lib/dspy/teleprompt/bootstrap_strategy.rb +26 -0
- data/lib/dspy/teleprompt/gepa.rb +448 -2803
- data/lib/dspy/teleprompt/mipro_v2.rb +624 -100
- data/lib/dspy/teleprompt/utils.rb +349 -42
- data/lib/dspy/version.rb +2 -2
- data/lib/dspy.rb +4 -2
- data/lib/gepa/api.rb +61 -0
- data/lib/gepa/core/engine.rb +226 -0
- data/lib/gepa/core/evaluation_batch.rb +26 -0
- data/lib/gepa/core/result.rb +92 -0
- data/lib/gepa/core/state.rb +231 -0
- data/lib/gepa/logging/experiment_tracker.rb +54 -0
- data/lib/gepa/logging/logger.rb +57 -0
- data/lib/gepa/logging.rb +9 -0
- data/lib/gepa/proposer/base.rb +27 -0
- data/lib/gepa/proposer/merge_proposer.rb +424 -0
- data/lib/gepa/proposer/reflective_mutation/base.rb +48 -0
- data/lib/gepa/proposer/reflective_mutation/reflective_mutation.rb +188 -0
- data/lib/gepa/strategies/batch_sampler.rb +91 -0
- data/lib/gepa/strategies/candidate_selector.rb +97 -0
- data/lib/gepa/strategies/component_selector.rb +57 -0
- data/lib/gepa/strategies/instruction_proposal.rb +120 -0
- data/lib/gepa/telemetry.rb +122 -0
- data/lib/gepa/utils/pareto.rb +119 -0
- data/lib/gepa.rb +21 -0
- metadata +59 -4
- data/lib/dspy/teleprompt/simple_optimizer.rb +0 -497
metadata
CHANGED
@@ -1,13 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: dspy
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.29.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Vicente Reig Rincón de Arellano
|
8
|
+
autorequire:
|
8
9
|
bindir: bin
|
9
10
|
cert_chain: []
|
10
|
-
date: 2025-10-
|
11
|
+
date: 2025-10-19 00:00:00.000000000 Z
|
11
12
|
dependencies:
|
12
13
|
- !ruby/object:Gem::Dependency
|
13
14
|
name: dry-configurable
|
@@ -51,6 +52,20 @@ dependencies:
|
|
51
52
|
- - "~>"
|
52
53
|
- !ruby/object:Gem::Version
|
53
54
|
version: '2.29'
|
55
|
+
- !ruby/object:Gem::Dependency
|
56
|
+
name: concurrent-ruby
|
57
|
+
requirement: !ruby/object:Gem::Requirement
|
58
|
+
requirements:
|
59
|
+
- - "~>"
|
60
|
+
- !ruby/object:Gem::Version
|
61
|
+
version: '1.3'
|
62
|
+
type: :runtime
|
63
|
+
prerelease: false
|
64
|
+
version_requirements: !ruby/object:Gem::Requirement
|
65
|
+
requirements:
|
66
|
+
- - "~>"
|
67
|
+
- !ruby/object:Gem::Version
|
68
|
+
version: '1.3'
|
54
69
|
- !ruby/object:Gem::Dependency
|
55
70
|
name: openai
|
56
71
|
requirement: !ruby/object:Gem::Requirement
|
@@ -121,6 +136,20 @@ dependencies:
|
|
121
136
|
- - "~>"
|
122
137
|
- !ruby/object:Gem::Version
|
123
138
|
version: '0.3'
|
139
|
+
- !ruby/object:Gem::Dependency
|
140
|
+
name: sorbet-baml
|
141
|
+
requirement: !ruby/object:Gem::Requirement
|
142
|
+
requirements:
|
143
|
+
- - "~>"
|
144
|
+
- !ruby/object:Gem::Version
|
145
|
+
version: '0.1'
|
146
|
+
type: :runtime
|
147
|
+
prerelease: false
|
148
|
+
version_requirements: !ruby/object:Gem::Requirement
|
149
|
+
requirements:
|
150
|
+
- - "~>"
|
151
|
+
- !ruby/object:Gem::Version
|
152
|
+
version: '0.1'
|
124
153
|
- !ruby/object:Gem::Dependency
|
125
154
|
name: numo-narray
|
126
155
|
requirement: !ruby/object:Gem::Requirement
|
@@ -189,9 +218,12 @@ extra_rdoc_files: []
|
|
189
218
|
files:
|
190
219
|
- README.md
|
191
220
|
- lib/dspy.rb
|
221
|
+
- lib/dspy/callbacks.rb
|
192
222
|
- lib/dspy/chain_of_thought.rb
|
193
223
|
- lib/dspy/code_act.rb
|
194
224
|
- lib/dspy/context.rb
|
225
|
+
- lib/dspy/datasets.rb
|
226
|
+
- lib/dspy/datasets/ade.rb
|
195
227
|
- lib/dspy/error_formatter.rb
|
196
228
|
- lib/dspy/errors.rb
|
197
229
|
- lib/dspy/evaluate.rb
|
@@ -239,8 +271,10 @@ files:
|
|
239
271
|
- lib/dspy/predict.rb
|
240
272
|
- lib/dspy/prediction.rb
|
241
273
|
- lib/dspy/prompt.rb
|
274
|
+
- lib/dspy/propose/dataset_summary_generator.rb
|
242
275
|
- lib/dspy/propose/grounded_proposer.rb
|
243
276
|
- lib/dspy/re_act.rb
|
277
|
+
- lib/dspy/reflection_lm.rb
|
244
278
|
- lib/dspy/registry/registry_manager.rb
|
245
279
|
- lib/dspy/registry/signature_registry.rb
|
246
280
|
- lib/dspy/schema_adapters.rb
|
@@ -248,10 +282,10 @@ files:
|
|
248
282
|
- lib/dspy/storage/program_storage.rb
|
249
283
|
- lib/dspy/storage/storage_manager.rb
|
250
284
|
- lib/dspy/structured_outputs_prompt.rb
|
285
|
+
- lib/dspy/teleprompt/bootstrap_strategy.rb
|
251
286
|
- lib/dspy/teleprompt/data_handler.rb
|
252
287
|
- lib/dspy/teleprompt/gepa.rb
|
253
288
|
- lib/dspy/teleprompt/mipro_v2.rb
|
254
|
-
- lib/dspy/teleprompt/simple_optimizer.rb
|
255
289
|
- lib/dspy/teleprompt/teleprompter.rb
|
256
290
|
- lib/dspy/teleprompt/utils.rb
|
257
291
|
- lib/dspy/tools.rb
|
@@ -264,10 +298,30 @@ files:
|
|
264
298
|
- lib/dspy/type_system/sorbet_json_schema.rb
|
265
299
|
- lib/dspy/utils/serialization.rb
|
266
300
|
- lib/dspy/version.rb
|
301
|
+
- lib/gepa.rb
|
302
|
+
- lib/gepa/api.rb
|
303
|
+
- lib/gepa/core/engine.rb
|
304
|
+
- lib/gepa/core/evaluation_batch.rb
|
305
|
+
- lib/gepa/core/result.rb
|
306
|
+
- lib/gepa/core/state.rb
|
307
|
+
- lib/gepa/logging.rb
|
308
|
+
- lib/gepa/logging/experiment_tracker.rb
|
309
|
+
- lib/gepa/logging/logger.rb
|
310
|
+
- lib/gepa/proposer/base.rb
|
311
|
+
- lib/gepa/proposer/merge_proposer.rb
|
312
|
+
- lib/gepa/proposer/reflective_mutation/base.rb
|
313
|
+
- lib/gepa/proposer/reflective_mutation/reflective_mutation.rb
|
314
|
+
- lib/gepa/strategies/batch_sampler.rb
|
315
|
+
- lib/gepa/strategies/candidate_selector.rb
|
316
|
+
- lib/gepa/strategies/component_selector.rb
|
317
|
+
- lib/gepa/strategies/instruction_proposal.rb
|
318
|
+
- lib/gepa/telemetry.rb
|
319
|
+
- lib/gepa/utils/pareto.rb
|
267
320
|
homepage: https://github.com/vicentereig/dspy.rb
|
268
321
|
licenses:
|
269
322
|
- MIT
|
270
323
|
metadata: {}
|
324
|
+
post_install_message:
|
271
325
|
rdoc_options: []
|
272
326
|
require_paths:
|
273
327
|
- lib
|
@@ -282,7 +336,8 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
282
336
|
- !ruby/object:Gem::Version
|
283
337
|
version: '0'
|
284
338
|
requirements: []
|
285
|
-
rubygems_version: 3.
|
339
|
+
rubygems_version: 3.0.3.1
|
340
|
+
signing_key:
|
286
341
|
specification_version: 4
|
287
342
|
summary: The Ruby framework for programming—rather than prompting—language models.
|
288
343
|
test_files: []
|
@@ -1,497 +0,0 @@
|
|
1
|
-
# frozen_string_literal: true
|
2
|
-
|
3
|
-
require 'sorbet-runtime'
|
4
|
-
require_relative 'teleprompter'
|
5
|
-
require_relative 'utils'
|
6
|
-
require_relative '../propose/grounded_proposer'
|
7
|
-
|
8
|
-
module DSPy
|
9
|
-
module Teleprompt
|
10
|
-
# Simple optimization algorithm using random/grid search
|
11
|
-
# Uses grounded proposer for instruction generation and bootstrap for examples
|
12
|
-
class SimpleOptimizer < Teleprompter
|
13
|
-
extend T::Sig
|
14
|
-
|
15
|
-
# Configuration specific to simple optimization
|
16
|
-
class OptimizerConfig < Config
|
17
|
-
extend T::Sig
|
18
|
-
|
19
|
-
sig { returns(Integer) }
|
20
|
-
attr_accessor :num_trials
|
21
|
-
|
22
|
-
sig { returns(String) }
|
23
|
-
attr_accessor :search_strategy
|
24
|
-
|
25
|
-
sig { returns(T::Boolean) }
|
26
|
-
attr_accessor :use_instruction_optimization
|
27
|
-
|
28
|
-
sig { returns(T::Boolean) }
|
29
|
-
attr_accessor :use_few_shot_optimization
|
30
|
-
|
31
|
-
sig { returns(DSPy::Propose::GroundedProposer::Config) }
|
32
|
-
attr_accessor :proposer_config
|
33
|
-
|
34
|
-
sig { void }
|
35
|
-
def initialize
|
36
|
-
super
|
37
|
-
@num_trials = 10
|
38
|
-
@search_strategy = "random" # or "grid"
|
39
|
-
@use_instruction_optimization = true
|
40
|
-
@use_few_shot_optimization = true
|
41
|
-
@proposer_config = DSPy::Propose::GroundedProposer::Config.new
|
42
|
-
end
|
43
|
-
end
|
44
|
-
|
45
|
-
# Result of a single optimization trial
|
46
|
-
class TrialResult
|
47
|
-
extend T::Sig
|
48
|
-
|
49
|
-
sig { returns(Integer) }
|
50
|
-
attr_reader :trial_number
|
51
|
-
|
52
|
-
sig { returns(T.untyped) }
|
53
|
-
attr_reader :program
|
54
|
-
|
55
|
-
sig { returns(String) }
|
56
|
-
attr_reader :instruction
|
57
|
-
|
58
|
-
sig { returns(T::Array[T.untyped]) }
|
59
|
-
attr_reader :few_shot_examples
|
60
|
-
|
61
|
-
sig { returns(DSPy::Evaluate::BatchEvaluationResult) }
|
62
|
-
attr_reader :evaluation_result
|
63
|
-
|
64
|
-
sig { returns(Float) }
|
65
|
-
attr_reader :score
|
66
|
-
|
67
|
-
sig { returns(T::Hash[Symbol, T.untyped]) }
|
68
|
-
attr_reader :metadata
|
69
|
-
|
70
|
-
sig do
|
71
|
-
params(
|
72
|
-
trial_number: Integer,
|
73
|
-
program: T.untyped,
|
74
|
-
instruction: String,
|
75
|
-
few_shot_examples: T::Array[T.untyped],
|
76
|
-
evaluation_result: DSPy::Evaluate::BatchEvaluationResult,
|
77
|
-
score: Float,
|
78
|
-
metadata: T::Hash[Symbol, T.untyped]
|
79
|
-
).void
|
80
|
-
end
|
81
|
-
def initialize(trial_number:, program:, instruction:, few_shot_examples:, evaluation_result:, score:, metadata:)
|
82
|
-
@trial_number = trial_number
|
83
|
-
@program = program
|
84
|
-
@instruction = instruction
|
85
|
-
@few_shot_examples = few_shot_examples
|
86
|
-
@evaluation_result = evaluation_result
|
87
|
-
@score = score
|
88
|
-
@metadata = metadata.freeze
|
89
|
-
end
|
90
|
-
|
91
|
-
sig { returns(T::Boolean) }
|
92
|
-
def successful?
|
93
|
-
@score > 0.0
|
94
|
-
end
|
95
|
-
end
|
96
|
-
|
97
|
-
sig { returns(OptimizerConfig) }
|
98
|
-
attr_reader :optimizer_config
|
99
|
-
|
100
|
-
sig { returns(T.nilable(DSPy::Propose::GroundedProposer)) }
|
101
|
-
attr_reader :proposer
|
102
|
-
|
103
|
-
sig do
|
104
|
-
params(
|
105
|
-
metric: T.nilable(T.proc.params(arg0: T.untyped, arg1: T.untyped).returns(T.untyped)),
|
106
|
-
config: T.nilable(OptimizerConfig)
|
107
|
-
).void
|
108
|
-
end
|
109
|
-
def initialize(metric: nil, config: nil)
|
110
|
-
@optimizer_config = config || OptimizerConfig.new
|
111
|
-
super(metric: metric, config: @optimizer_config)
|
112
|
-
|
113
|
-
@proposer = if @optimizer_config.use_instruction_optimization
|
114
|
-
DSPy::Propose::GroundedProposer.new(config: @optimizer_config.proposer_config)
|
115
|
-
else
|
116
|
-
nil
|
117
|
-
end
|
118
|
-
end
|
119
|
-
|
120
|
-
# Main optimization method
|
121
|
-
sig do
|
122
|
-
params(
|
123
|
-
program: T.untyped,
|
124
|
-
trainset: T::Array[T.untyped],
|
125
|
-
valset: T.nilable(T::Array[T.untyped])
|
126
|
-
).returns(OptimizationResult)
|
127
|
-
end
|
128
|
-
def compile(program, trainset:, valset: nil)
|
129
|
-
validate_inputs(program, trainset, valset)
|
130
|
-
|
131
|
-
instrument_step('compile', {
|
132
|
-
trainset_size: trainset.size,
|
133
|
-
valset_size: valset&.size || 0,
|
134
|
-
num_trials: @optimizer_config.num_trials,
|
135
|
-
search_strategy: @optimizer_config.search_strategy
|
136
|
-
}) do
|
137
|
-
# Convert examples to typed format
|
138
|
-
typed_trainset = ensure_typed_examples(trainset)
|
139
|
-
typed_valset = valset ? ensure_typed_examples(valset) : nil
|
140
|
-
|
141
|
-
# Use validation set if available, otherwise use part of training set
|
142
|
-
evaluation_set = typed_valset || typed_trainset.take(10)
|
143
|
-
|
144
|
-
# Bootstrap few-shot examples if enabled
|
145
|
-
bootstrap_result = nil
|
146
|
-
if @optimizer_config.use_few_shot_optimization
|
147
|
-
bootstrap_result = bootstrap_examples(program, typed_trainset)
|
148
|
-
end
|
149
|
-
|
150
|
-
# Generate instruction candidates if enabled
|
151
|
-
instruction_candidates = []
|
152
|
-
if @optimizer_config.use_instruction_optimization && @proposer
|
153
|
-
instruction_candidates = generate_instruction_candidates(program, typed_trainset, bootstrap_result)
|
154
|
-
end
|
155
|
-
|
156
|
-
# Run optimization trials
|
157
|
-
trials = run_optimization_trials(
|
158
|
-
program,
|
159
|
-
evaluation_set,
|
160
|
-
instruction_candidates,
|
161
|
-
bootstrap_result
|
162
|
-
)
|
163
|
-
|
164
|
-
# Find best trial
|
165
|
-
best_trial = find_best_trial(trials)
|
166
|
-
|
167
|
-
# Build optimization result
|
168
|
-
optimization_result = build_optimization_result(best_trial, trials)
|
169
|
-
|
170
|
-
save_results(optimization_result)
|
171
|
-
optimization_result
|
172
|
-
end
|
173
|
-
end
|
174
|
-
|
175
|
-
private
|
176
|
-
|
177
|
-
# Bootstrap few-shot examples from training set
|
178
|
-
sig { params(program: T.untyped, trainset: T::Array[DSPy::Example]).returns(Utils::BootstrapResult) }
|
179
|
-
def bootstrap_examples(program, trainset)
|
180
|
-
bootstrap_config = Utils::BootstrapConfig.new
|
181
|
-
bootstrap_config.max_bootstrapped_examples = @optimizer_config.max_bootstrapped_examples
|
182
|
-
bootstrap_config.max_labeled_examples = @optimizer_config.max_labeled_examples
|
183
|
-
bootstrap_config.num_candidate_sets = [@optimizer_config.num_trials / 2, 5].max
|
184
|
-
bootstrap_config.max_errors = @optimizer_config.max_errors
|
185
|
-
bootstrap_config.num_threads = @optimizer_config.num_threads
|
186
|
-
|
187
|
-
Utils.create_n_fewshot_demo_sets(program, trainset, config: bootstrap_config, metric: @metric)
|
188
|
-
end
|
189
|
-
|
190
|
-
# Generate instruction candidates using the proposer
|
191
|
-
sig do
|
192
|
-
params(
|
193
|
-
program: T.untyped,
|
194
|
-
trainset: T::Array[DSPy::Example],
|
195
|
-
bootstrap_result: T.nilable(Utils::BootstrapResult)
|
196
|
-
).returns(T::Array[String])
|
197
|
-
end
|
198
|
-
def generate_instruction_candidates(program, trainset, bootstrap_result)
|
199
|
-
return [] unless @proposer
|
200
|
-
|
201
|
-
# Get current instruction if available
|
202
|
-
current_instruction = extract_current_instruction(program)
|
203
|
-
|
204
|
-
# Use few-shot examples from bootstrap if available
|
205
|
-
few_shot_examples = bootstrap_result&.successful_examples&.take(5)
|
206
|
-
|
207
|
-
# Get signature class from program
|
208
|
-
signature_class = extract_signature_class(program)
|
209
|
-
return [] unless signature_class
|
210
|
-
|
211
|
-
proposal_result = @proposer.propose_instructions(
|
212
|
-
signature_class,
|
213
|
-
trainset,
|
214
|
-
few_shot_examples: few_shot_examples,
|
215
|
-
current_instruction: current_instruction
|
216
|
-
)
|
217
|
-
|
218
|
-
proposal_result.candidate_instructions
|
219
|
-
end
|
220
|
-
|
221
|
-
# Run optimization trials with different configurations
|
222
|
-
sig do
|
223
|
-
params(
|
224
|
-
program: T.untyped,
|
225
|
-
evaluation_set: T::Array[DSPy::Example],
|
226
|
-
instruction_candidates: T::Array[String],
|
227
|
-
bootstrap_result: T.nilable(Utils::BootstrapResult)
|
228
|
-
).returns(T::Array[TrialResult])
|
229
|
-
end
|
230
|
-
def run_optimization_trials(program, evaluation_set, instruction_candidates, bootstrap_result)
|
231
|
-
trials = []
|
232
|
-
|
233
|
-
# Generate trial configurations
|
234
|
-
trial_configs = generate_trial_configurations(instruction_candidates, bootstrap_result)
|
235
|
-
|
236
|
-
trial_configs.take(@optimizer_config.num_trials).each_with_index do |config, index|
|
237
|
-
trial_number = index + 1
|
238
|
-
|
239
|
-
emit_event('trial_start', {
|
240
|
-
trial_number: trial_number,
|
241
|
-
instruction: config[:instruction],
|
242
|
-
num_few_shot: config[:few_shot_examples]&.size || 0
|
243
|
-
})
|
244
|
-
|
245
|
-
begin
|
246
|
-
trial_result = run_single_trial(program, evaluation_set, config, trial_number)
|
247
|
-
trials << trial_result
|
248
|
-
|
249
|
-
emit_event('trial_complete', {
|
250
|
-
trial_number: trial_number,
|
251
|
-
score: trial_result.score,
|
252
|
-
successful: trial_result.successful?,
|
253
|
-
duration_ms: trial_result.metadata[:duration_ms] || 0
|
254
|
-
})
|
255
|
-
rescue => error
|
256
|
-
emit_event('error', {
|
257
|
-
trial_number: trial_number,
|
258
|
-
error_type: error.class.name,
|
259
|
-
error_message: error.message
|
260
|
-
})
|
261
|
-
|
262
|
-
DSPy.logger.error("Trial #{trial_number} failed: #{error.message}")
|
263
|
-
end
|
264
|
-
end
|
265
|
-
|
266
|
-
trials
|
267
|
-
end
|
268
|
-
|
269
|
-
# Generate configurations for trials
|
270
|
-
sig do
|
271
|
-
params(
|
272
|
-
instruction_candidates: T::Array[String],
|
273
|
-
bootstrap_result: T.nilable(Utils::BootstrapResult)
|
274
|
-
).returns(T::Array[T::Hash[Symbol, T.untyped]])
|
275
|
-
end
|
276
|
-
def generate_trial_configurations(instruction_candidates, bootstrap_result)
|
277
|
-
configs = []
|
278
|
-
|
279
|
-
# Base configuration (no changes)
|
280
|
-
configs << { instruction: nil, few_shot_examples: [] }
|
281
|
-
|
282
|
-
# Instruction-only trials
|
283
|
-
instruction_candidates.each do |instruction|
|
284
|
-
configs << { instruction: instruction, few_shot_examples: [] }
|
285
|
-
end
|
286
|
-
|
287
|
-
# Few-shot only trials
|
288
|
-
if bootstrap_result&.candidate_sets&.any?
|
289
|
-
bootstrap_result.candidate_sets.each do |candidate_set|
|
290
|
-
configs << { instruction: nil, few_shot_examples: candidate_set }
|
291
|
-
end
|
292
|
-
end
|
293
|
-
|
294
|
-
# Combined instruction + few-shot trials
|
295
|
-
if instruction_candidates.any? && bootstrap_result&.candidate_sets&.any?
|
296
|
-
instruction_candidates.take(3).each do |instruction|
|
297
|
-
bootstrap_result.candidate_sets.take(2).each do |candidate_set|
|
298
|
-
configs << { instruction: instruction, few_shot_examples: candidate_set }
|
299
|
-
end
|
300
|
-
end
|
301
|
-
end
|
302
|
-
|
303
|
-
# Shuffle for random strategy
|
304
|
-
if @optimizer_config.search_strategy == "random"
|
305
|
-
configs.shuffle
|
306
|
-
else
|
307
|
-
configs
|
308
|
-
end
|
309
|
-
end
|
310
|
-
|
311
|
-
# Run a single optimization trial
|
312
|
-
sig do
|
313
|
-
params(
|
314
|
-
program: T.untyped,
|
315
|
-
evaluation_set: T::Array[DSPy::Example],
|
316
|
-
config: T::Hash[Symbol, T.untyped],
|
317
|
-
trial_number: Integer
|
318
|
-
).returns(TrialResult)
|
319
|
-
end
|
320
|
-
def run_single_trial(program, evaluation_set, config, trial_number)
|
321
|
-
start_time = Process.clock_gettime(Process::CLOCK_MONOTONIC)
|
322
|
-
|
323
|
-
# Create modified program
|
324
|
-
modified_program = apply_trial_configuration(program, config)
|
325
|
-
|
326
|
-
# Evaluate the modified program
|
327
|
-
evaluation_result = evaluate_program(modified_program, evaluation_set)
|
328
|
-
|
329
|
-
# Calculate score (using pass_rate as primary metric)
|
330
|
-
score = evaluation_result.pass_rate
|
331
|
-
|
332
|
-
end_time = Process.clock_gettime(Process::CLOCK_MONOTONIC)
|
333
|
-
duration_ms = ((end_time - start_time) * 1000).round(2)
|
334
|
-
|
335
|
-
metadata = {
|
336
|
-
duration_ms: duration_ms,
|
337
|
-
num_examples_evaluated: evaluation_result.total_examples,
|
338
|
-
instruction_length: config[:instruction]&.length || 0,
|
339
|
-
num_few_shot_examples: config[:few_shot_examples]&.size || 0
|
340
|
-
}
|
341
|
-
|
342
|
-
TrialResult.new(
|
343
|
-
trial_number: trial_number,
|
344
|
-
program: modified_program,
|
345
|
-
instruction: config[:instruction] || "",
|
346
|
-
few_shot_examples: config[:few_shot_examples] || [],
|
347
|
-
evaluation_result: evaluation_result,
|
348
|
-
score: score,
|
349
|
-
metadata: metadata
|
350
|
-
)
|
351
|
-
end
|
352
|
-
|
353
|
-
# Apply trial configuration to program
|
354
|
-
sig { params(program: T.untyped, config: T::Hash[Symbol, T.untyped]).returns(T.untyped) }
|
355
|
-
def apply_trial_configuration(program, config)
|
356
|
-
modified_program = program
|
357
|
-
|
358
|
-
# Apply instruction modification
|
359
|
-
if config[:instruction] && respond_to_instruction_modification?(program)
|
360
|
-
modified_program = apply_instruction_modification(modified_program, config[:instruction])
|
361
|
-
end
|
362
|
-
|
363
|
-
# Apply few-shot examples
|
364
|
-
if config[:few_shot_examples]&.any? && respond_to_few_shot_modification?(program)
|
365
|
-
modified_program = apply_few_shot_modification(modified_program, config[:few_shot_examples])
|
366
|
-
end
|
367
|
-
|
368
|
-
modified_program
|
369
|
-
end
|
370
|
-
|
371
|
-
# Apply instruction modification to program
|
372
|
-
sig { params(program: T.untyped, instruction: String).returns(T.untyped) }
|
373
|
-
def apply_instruction_modification(program, instruction)
|
374
|
-
if program.respond_to?(:with_instruction)
|
375
|
-
program.with_instruction(instruction)
|
376
|
-
else
|
377
|
-
program
|
378
|
-
end
|
379
|
-
end
|
380
|
-
|
381
|
-
# Apply few-shot examples to program
|
382
|
-
sig { params(program: T.untyped, examples: T::Array[T.untyped]).returns(T.untyped) }
|
383
|
-
def apply_few_shot_modification(program, examples)
|
384
|
-
if program.respond_to?(:with_examples)
|
385
|
-
# Convert to FewShotExample format
|
386
|
-
few_shot_examples = examples.map do |example|
|
387
|
-
DSPy::FewShotExample.new(
|
388
|
-
input: example.input_values,
|
389
|
-
output: example.expected_values,
|
390
|
-
reasoning: extract_reasoning_from_example(example)
|
391
|
-
)
|
392
|
-
end
|
393
|
-
program.with_examples(few_shot_examples)
|
394
|
-
else
|
395
|
-
program
|
396
|
-
end
|
397
|
-
end
|
398
|
-
|
399
|
-
# Find the best trial based on score
|
400
|
-
sig { params(trials: T::Array[TrialResult]).returns(T.nilable(TrialResult)) }
|
401
|
-
def find_best_trial(trials)
|
402
|
-
return nil if trials.empty?
|
403
|
-
|
404
|
-
trials.max_by(&:score)
|
405
|
-
end
|
406
|
-
|
407
|
-
# Build the final optimization result
|
408
|
-
sig { params(best_trial: T.nilable(TrialResult), all_trials: T::Array[TrialResult]).returns(OptimizationResult) }
|
409
|
-
def build_optimization_result(best_trial, all_trials)
|
410
|
-
if best_trial
|
411
|
-
scores = { pass_rate: best_trial.score }
|
412
|
-
history = {
|
413
|
-
total_trials: all_trials.size,
|
414
|
-
successful_trials: all_trials.count(&:successful?),
|
415
|
-
trial_scores: all_trials.map(&:score),
|
416
|
-
best_trial_number: best_trial.trial_number
|
417
|
-
}
|
418
|
-
|
419
|
-
OptimizationResult.new(
|
420
|
-
optimized_program: best_trial.program,
|
421
|
-
scores: scores,
|
422
|
-
history: history,
|
423
|
-
best_score_name: "pass_rate",
|
424
|
-
best_score_value: best_trial.score,
|
425
|
-
metadata: {
|
426
|
-
optimizer: "SimpleOptimizer",
|
427
|
-
search_strategy: @optimizer_config.search_strategy,
|
428
|
-
num_trials: @optimizer_config.num_trials,
|
429
|
-
best_instruction: best_trial.instruction,
|
430
|
-
best_num_few_shot: best_trial.few_shot_examples.size,
|
431
|
-
optimization_timestamp: Time.now.iso8601
|
432
|
-
}
|
433
|
-
)
|
434
|
-
else
|
435
|
-
# No successful trials
|
436
|
-
OptimizationResult.new(
|
437
|
-
optimized_program: nil,
|
438
|
-
scores: { pass_rate: 0.0 },
|
439
|
-
history: { total_trials: all_trials.size, successful_trials: 0 },
|
440
|
-
best_score_name: "pass_rate",
|
441
|
-
best_score_value: 0.0,
|
442
|
-
metadata: { optimizer: "SimpleOptimizer", error: "No successful trials" }
|
443
|
-
)
|
444
|
-
end
|
445
|
-
end
|
446
|
-
|
447
|
-
# Helper methods for program introspection
|
448
|
-
sig { params(program: T.untyped).returns(T.nilable(String)) }
|
449
|
-
def extract_current_instruction(program)
|
450
|
-
if program.respond_to?(:prompt) && program.prompt.respond_to?(:instruction)
|
451
|
-
program.prompt.instruction
|
452
|
-
elsif program.respond_to?(:system_signature)
|
453
|
-
# Try to extract from system signature
|
454
|
-
system_sig = program.system_signature
|
455
|
-
system_sig.is_a?(String) ? system_sig : nil
|
456
|
-
else
|
457
|
-
nil
|
458
|
-
end
|
459
|
-
end
|
460
|
-
|
461
|
-
sig { params(program: T.untyped).returns(T.nilable(T.class_of(DSPy::Signature))) }
|
462
|
-
def extract_signature_class(program)
|
463
|
-
if program.respond_to?(:signature_class)
|
464
|
-
program.signature_class
|
465
|
-
else
|
466
|
-
nil
|
467
|
-
end
|
468
|
-
end
|
469
|
-
|
470
|
-
sig { params(program: T.untyped).returns(T::Boolean) }
|
471
|
-
def respond_to_instruction_modification?(program)
|
472
|
-
program.respond_to?(:with_instruction)
|
473
|
-
end
|
474
|
-
|
475
|
-
sig { params(program: T.untyped).returns(T::Boolean) }
|
476
|
-
def respond_to_few_shot_modification?(program)
|
477
|
-
program.respond_to?(:with_examples)
|
478
|
-
end
|
479
|
-
|
480
|
-
sig { params(example: T.untyped).returns(T.nilable(String)) }
|
481
|
-
def extract_reasoning_from_example(example)
|
482
|
-
case example
|
483
|
-
when DSPy::Example
|
484
|
-
if example.expected_values.key?(:reasoning)
|
485
|
-
example.expected_values[:reasoning]
|
486
|
-
elsif example.expected_values.key?(:explanation)
|
487
|
-
example.expected_values[:explanation]
|
488
|
-
else
|
489
|
-
nil
|
490
|
-
end
|
491
|
-
else
|
492
|
-
nil
|
493
|
-
end
|
494
|
-
end
|
495
|
-
end
|
496
|
-
end
|
497
|
-
end
|