dspy 0.29.1 → 0.30.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/LICENSE +45 -0
- data/README.md +159 -95
- data/lib/dspy/callbacks.rb +93 -19
- data/lib/dspy/context.rb +101 -5
- data/lib/dspy/errors.rb +19 -1
- data/lib/dspy/{datasets.rb → evals/version.rb} +2 -3
- data/lib/dspy/{evaluate.rb → evals.rb} +373 -110
- data/lib/dspy/mixins/instruction_updatable.rb +22 -0
- data/lib/dspy/module.rb +213 -17
- data/lib/dspy/observability.rb +40 -182
- data/lib/dspy/predict.rb +10 -2
- data/lib/dspy/propose/dataset_summary_generator.rb +28 -18
- data/lib/dspy/re_act.rb +21 -0
- data/lib/dspy/schema/sorbet_json_schema.rb +302 -0
- data/lib/dspy/schema/version.rb +7 -0
- data/lib/dspy/schema.rb +4 -0
- data/lib/dspy/structured_outputs_prompt.rb +48 -0
- data/lib/dspy/support/warning_filters.rb +27 -0
- data/lib/dspy/teleprompt/gepa.rb +9 -588
- data/lib/dspy/teleprompt/instruction_updates.rb +94 -0
- data/lib/dspy/teleprompt/teleprompter.rb +6 -6
- data/lib/dspy/teleprompt/utils.rb +5 -65
- data/lib/dspy/type_system/sorbet_json_schema.rb +2 -299
- data/lib/dspy/version.rb +1 -1
- data/lib/dspy.rb +39 -7
- metadata +18 -61
- data/lib/dspy/code_act.rb +0 -477
- data/lib/dspy/datasets/ade.rb +0 -90
- data/lib/dspy/observability/async_span_processor.rb +0 -250
- data/lib/dspy/observability/observation_type.rb +0 -65
- data/lib/dspy/optimizers/gaussian_process.rb +0 -141
- data/lib/dspy/teleprompt/mipro_v2.rb +0 -1672
- data/lib/gepa/api.rb +0 -61
- data/lib/gepa/core/engine.rb +0 -226
- data/lib/gepa/core/evaluation_batch.rb +0 -26
- data/lib/gepa/core/result.rb +0 -92
- data/lib/gepa/core/state.rb +0 -231
- data/lib/gepa/logging/experiment_tracker.rb +0 -54
- data/lib/gepa/logging/logger.rb +0 -57
- data/lib/gepa/logging.rb +0 -9
- data/lib/gepa/proposer/base.rb +0 -27
- data/lib/gepa/proposer/merge_proposer.rb +0 -424
- data/lib/gepa/proposer/reflective_mutation/base.rb +0 -48
- data/lib/gepa/proposer/reflective_mutation/reflective_mutation.rb +0 -188
- data/lib/gepa/strategies/batch_sampler.rb +0 -91
- data/lib/gepa/strategies/candidate_selector.rb +0 -97
- data/lib/gepa/strategies/component_selector.rb +0 -57
- data/lib/gepa/strategies/instruction_proposal.rb +0 -120
- data/lib/gepa/telemetry.rb +0 -122
- data/lib/gepa/utils/pareto.rb +0 -119
- data/lib/gepa.rb +0 -21
data/lib/dspy/teleprompt/gepa.rb
CHANGED
|
@@ -1,591 +1,12 @@
|
|
|
1
|
+
# typed: false
|
|
1
2
|
# frozen_string_literal: true
|
|
2
3
|
|
|
3
|
-
|
|
4
|
-
require '
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
module Teleprompt
|
|
12
|
-
class GEPA < Teleprompter
|
|
13
|
-
extend T::Sig
|
|
14
|
-
DEFAULT_CONFIG = {
|
|
15
|
-
max_metric_calls: 32,
|
|
16
|
-
minibatch_size: 2,
|
|
17
|
-
perfect_score: 1.0,
|
|
18
|
-
skip_perfect_score: true,
|
|
19
|
-
use_merge: true,
|
|
20
|
-
max_merge_invocations: 5
|
|
21
|
-
}.freeze
|
|
22
|
-
|
|
23
|
-
def self.configure
|
|
24
|
-
yield(default_config) if block_given?
|
|
25
|
-
end
|
|
26
|
-
|
|
27
|
-
def self.default_config
|
|
28
|
-
@default_config ||= DEFAULT_CONFIG.dup
|
|
29
|
-
end
|
|
30
|
-
|
|
31
|
-
class NullExperimentTracker
|
|
32
|
-
extend T::Sig
|
|
33
|
-
attr_reader :events
|
|
34
|
-
|
|
35
|
-
def initialize
|
|
36
|
-
@events = []
|
|
37
|
-
end
|
|
38
|
-
|
|
39
|
-
sig { params(metrics: T::Hash[Symbol, T.untyped], step: T.nilable(Integer)).void }
|
|
40
|
-
def log_metrics(metrics, step: nil)
|
|
41
|
-
@events << { metrics: metrics, step: step }
|
|
42
|
-
end
|
|
43
|
-
end
|
|
44
|
-
|
|
45
|
-
class NullLogger
|
|
46
|
-
extend T::Sig
|
|
47
|
-
attr_reader :messages
|
|
48
|
-
|
|
49
|
-
def initialize
|
|
50
|
-
@messages = []
|
|
51
|
-
end
|
|
52
|
-
|
|
53
|
-
sig { params(message: String).void }
|
|
54
|
-
def log(message)
|
|
55
|
-
@messages << message
|
|
56
|
-
DSPy.log('gepa.log', message: message)
|
|
57
|
-
end
|
|
58
|
-
end
|
|
59
|
-
|
|
60
|
-
class PredictAdapter
|
|
61
|
-
extend T::Sig
|
|
62
|
-
|
|
63
|
-
ReflectionLMType = T.type_alias do
|
|
64
|
-
T.any(DSPy::ReflectionLM, T.proc.params(arg0: String).returns(String))
|
|
65
|
-
end
|
|
66
|
-
|
|
67
|
-
FeedbackFnType = T.type_alias do
|
|
68
|
-
T.proc.params(
|
|
69
|
-
predictor_output: T.untyped,
|
|
70
|
-
predictor_inputs: T::Hash[T.any(String, Symbol), T.untyped],
|
|
71
|
-
module_inputs: DSPy::Example,
|
|
72
|
-
module_outputs: T.untyped,
|
|
73
|
-
captured_trace: T::Array[T::Hash[Symbol, T.untyped]]
|
|
74
|
-
).returns(T.untyped)
|
|
75
|
-
end
|
|
76
|
-
|
|
77
|
-
sig do
|
|
78
|
-
params(
|
|
79
|
-
student: DSPy::Module,
|
|
80
|
-
metric: T.proc.params(arg0: DSPy::Example, arg1: T.untyped).returns(T.untyped),
|
|
81
|
-
reflection_lm: T.nilable(ReflectionLMType),
|
|
82
|
-
feedback_map: T::Hash[String, FeedbackFnType]
|
|
83
|
-
).void
|
|
84
|
-
end
|
|
85
|
-
def initialize(student, metric, reflection_lm: nil, feedback_map: {})
|
|
86
|
-
@student = student
|
|
87
|
-
@metric = metric
|
|
88
|
-
@reflection_lm = reflection_lm
|
|
89
|
-
@feedback_map = feedback_map.transform_keys(&:to_s)
|
|
90
|
-
|
|
91
|
-
@predictor_entries = resolve_predictors(@student)
|
|
92
|
-
@predictor_names = @predictor_entries.map(&:first)
|
|
93
|
-
end
|
|
94
|
-
|
|
95
|
-
sig { returns(T::Hash[String, String]) }
|
|
96
|
-
def seed_candidate
|
|
97
|
-
@predictor_entries.each_with_object({}) do |(name, predictor), memo|
|
|
98
|
-
memo[name] = extract_instruction(predictor)
|
|
99
|
-
end
|
|
100
|
-
end
|
|
101
|
-
|
|
102
|
-
sig do
|
|
103
|
-
params(candidate: T::Hash[String, String], recorder: T.nilable(T.untyped)).returns(DSPy::Module)
|
|
104
|
-
end
|
|
105
|
-
def build_program(candidate, recorder: nil)
|
|
106
|
-
program = clone_module(@student)
|
|
107
|
-
duplicate_predictors!(program)
|
|
108
|
-
|
|
109
|
-
predictor_map = resolve_predictors(program).to_h
|
|
110
|
-
candidate.each do |name, new_instruction|
|
|
111
|
-
predictor = predictor_map[name]
|
|
112
|
-
next unless predictor
|
|
113
|
-
|
|
114
|
-
updated = apply_instruction_to_predictor(predictor, new_instruction)
|
|
115
|
-
if predictor.equal?(program)
|
|
116
|
-
program = updated
|
|
117
|
-
elsif !updated.equal?(predictor)
|
|
118
|
-
replace_reference(program, predictor, updated)
|
|
119
|
-
end
|
|
120
|
-
predictor_map[name] = updated
|
|
121
|
-
end
|
|
122
|
-
|
|
123
|
-
wrap_predictors_for_tracing!(program, recorder: recorder) if recorder
|
|
124
|
-
program
|
|
125
|
-
end
|
|
126
|
-
|
|
127
|
-
sig do
|
|
128
|
-
params(
|
|
129
|
-
batch: T::Array[DSPy::Example],
|
|
130
|
-
candidate: T::Hash[String, String],
|
|
131
|
-
capture_traces: T::Boolean
|
|
132
|
-
).returns(::GEPA::Core::EvaluationBatch)
|
|
133
|
-
end
|
|
134
|
-
def evaluate(batch, candidate, capture_traces: false)
|
|
135
|
-
recorder = capture_traces ? TraceRecorder.new : nil
|
|
136
|
-
program = build_program(candidate, recorder: recorder)
|
|
137
|
-
|
|
138
|
-
if capture_traces
|
|
139
|
-
trajectories = batch.map do |example|
|
|
140
|
-
recorder&.start_example
|
|
141
|
-
prediction = program.call(**example.input_values)
|
|
142
|
-
result = @metric.call(example, prediction)
|
|
143
|
-
score, feedback = extract_score_and_feedback(result)
|
|
144
|
-
trace_entries = recorder ? recorder.finish_example : []
|
|
145
|
-
|
|
146
|
-
{
|
|
147
|
-
example: example,
|
|
148
|
-
prediction: prediction,
|
|
149
|
-
score: score,
|
|
150
|
-
feedback: feedback,
|
|
151
|
-
trace: trace_entries
|
|
152
|
-
}
|
|
153
|
-
end
|
|
154
|
-
|
|
155
|
-
scores = trajectories.map { |row| row[:score] }
|
|
156
|
-
outputs = trajectories.map { |row| row[:prediction] }
|
|
157
|
-
::GEPA::Core::EvaluationBatch.new(outputs: outputs, scores: scores, trajectories: trajectories)
|
|
158
|
-
else
|
|
159
|
-
evaluator = DSPy::Evaluate.new(program, metric: nil, num_threads: nil, max_errors: batch.length * 100, provide_traceback: false)
|
|
160
|
-
results = batch.map do |example|
|
|
161
|
-
prediction = program.call(**example.input_values)
|
|
162
|
-
result = @metric.call(example, prediction)
|
|
163
|
-
score, = extract_score_and_feedback(result)
|
|
164
|
-
[prediction, score]
|
|
165
|
-
end
|
|
166
|
-
outputs = results.map(&:first)
|
|
167
|
-
scores = results.map(&:last)
|
|
168
|
-
::GEPA::Core::EvaluationBatch.new(outputs: outputs, scores: scores, trajectories: nil)
|
|
169
|
-
end
|
|
170
|
-
end
|
|
171
|
-
|
|
172
|
-
sig do
|
|
173
|
-
params(
|
|
174
|
-
candidate: T::Hash[String, String],
|
|
175
|
-
eval_batch: ::GEPA::Core::EvaluationBatch,
|
|
176
|
-
components_to_update: T::Array[String]
|
|
177
|
-
).returns(T::Hash[String, T::Array[T::Hash[String, T.untyped]]])
|
|
178
|
-
end
|
|
179
|
-
def make_reflective_dataset(candidate, eval_batch, components_to_update)
|
|
180
|
-
return {} unless eval_batch.trajectories
|
|
181
|
-
|
|
182
|
-
components_to_update.each_with_object({}) do |component, memo|
|
|
183
|
-
rows = eval_batch.trajectories.flat_map do |trajectory|
|
|
184
|
-
example = trajectory[:example]
|
|
185
|
-
expected = serialize_struct(example.expected)
|
|
186
|
-
actual_program_output = serialize_prediction(trajectory[:prediction])
|
|
187
|
-
diff = build_diff(expected, actual_program_output)
|
|
188
|
-
default_feedback = trajectory[:feedback] || "Score: #{trajectory[:score]}"
|
|
189
|
-
default_score = trajectory[:score]
|
|
190
|
-
full_trace = Array(trajectory[:trace])
|
|
191
|
-
|
|
192
|
-
full_trace.filter_map do |entry|
|
|
193
|
-
next unless entry[:predictor_name] == component
|
|
194
|
-
|
|
195
|
-
raw_inputs = entry[:inputs] || {}
|
|
196
|
-
raw_output = entry[:output]
|
|
197
|
-
inputs = serialize_struct(raw_inputs)
|
|
198
|
-
outputs = serialize_prediction(raw_output)
|
|
199
|
-
|
|
200
|
-
feedback_text = default_feedback
|
|
201
|
-
score_value = default_score
|
|
202
|
-
score_overridden = false
|
|
203
|
-
|
|
204
|
-
if (feedback_fn = @feedback_map[component])
|
|
205
|
-
feedback_result = feedback_fn.call(
|
|
206
|
-
predictor_output: raw_output,
|
|
207
|
-
predictor_inputs: raw_inputs,
|
|
208
|
-
module_inputs: example,
|
|
209
|
-
module_outputs: trajectory[:prediction],
|
|
210
|
-
captured_trace: full_trace
|
|
211
|
-
)
|
|
212
|
-
override_score, override_feedback = extract_score_and_feedback(feedback_result)
|
|
213
|
-
feedback_text = override_feedback if override_feedback
|
|
214
|
-
unless override_score.nil?
|
|
215
|
-
score_value = override_score
|
|
216
|
-
score_overridden = true
|
|
217
|
-
end
|
|
218
|
-
end
|
|
219
|
-
|
|
220
|
-
row = {
|
|
221
|
-
'Inputs' => inputs,
|
|
222
|
-
'Expected' => expected,
|
|
223
|
-
'Generated Outputs' => outputs,
|
|
224
|
-
'Diff' => diff,
|
|
225
|
-
'Feedback' => feedback_text
|
|
226
|
-
}
|
|
227
|
-
row['Score'] = score_value if score_overridden
|
|
228
|
-
row
|
|
229
|
-
end
|
|
230
|
-
end
|
|
231
|
-
memo[component] = rows unless rows.empty?
|
|
232
|
-
end
|
|
233
|
-
end
|
|
234
|
-
|
|
235
|
-
sig do
|
|
236
|
-
params(
|
|
237
|
-
candidate: T::Hash[String, String],
|
|
238
|
-
reflective_dataset: T::Hash[String, T::Array[T::Hash[String, T.untyped]]],
|
|
239
|
-
components_to_update: T::Array[String]
|
|
240
|
-
).returns(T::Hash[String, String])
|
|
241
|
-
end
|
|
242
|
-
def propose_new_texts(candidate, reflective_dataset, components_to_update)
|
|
243
|
-
if @reflection_lm
|
|
244
|
-
components_to_update.to_h do |name|
|
|
245
|
-
response = ::GEPA::Strategies::InstructionProposalSignature.run(
|
|
246
|
-
@reflection_lm,
|
|
247
|
-
{
|
|
248
|
-
'current_instruction_doc' => candidate[name],
|
|
249
|
-
'dataset_with_feedback' => reflective_dataset.fetch(name, [])
|
|
250
|
-
}
|
|
251
|
-
)
|
|
252
|
-
[name, response.fetch('new_instruction')]
|
|
253
|
-
end
|
|
254
|
-
else
|
|
255
|
-
components_to_update.to_h do |name|
|
|
256
|
-
[name, "#{candidate[name]} improved"]
|
|
257
|
-
end
|
|
258
|
-
end
|
|
259
|
-
end
|
|
260
|
-
|
|
261
|
-
private
|
|
262
|
-
|
|
263
|
-
sig { params(program: DSPy::Module).returns(T::Array[[String, DSPy::Module]]) }
|
|
264
|
-
def resolve_predictors(program)
|
|
265
|
-
pairs = program.named_predictors
|
|
266
|
-
pairs = [['self', program]] if pairs.empty?
|
|
267
|
-
pairs
|
|
268
|
-
end
|
|
269
|
-
|
|
270
|
-
sig { params(mod: DSPy::Module).returns(DSPy::Module) }
|
|
271
|
-
def clone_module(mod)
|
|
272
|
-
safe_clone(mod)
|
|
273
|
-
end
|
|
274
|
-
|
|
275
|
-
sig { params(program: DSPy::Module).void }
|
|
276
|
-
def duplicate_predictors!(program)
|
|
277
|
-
resolve_predictors(program).each do |name, predictor|
|
|
278
|
-
next unless @predictor_names.include?(name)
|
|
279
|
-
next if predictor.equal?(program)
|
|
280
|
-
clone = safe_clone(predictor)
|
|
281
|
-
replace_reference(program, predictor, clone)
|
|
282
|
-
end
|
|
283
|
-
end
|
|
284
|
-
|
|
285
|
-
sig do
|
|
286
|
-
params(container: T.untyped, target: T.untyped, replacement: T.untyped, visited: T::Set[Integer]).returns(T.untyped)
|
|
287
|
-
end
|
|
288
|
-
def replace_in_object(container, target, replacement, visited)
|
|
289
|
-
return replacement if container.equal?(target)
|
|
290
|
-
return container if visited.include?(container.object_id)
|
|
291
|
-
|
|
292
|
-
visited.add(container.object_id)
|
|
293
|
-
|
|
294
|
-
case container
|
|
295
|
-
when Array
|
|
296
|
-
modified = false
|
|
297
|
-
new_array = container.map do |value|
|
|
298
|
-
new_value = replace_in_object(value, target, replacement, visited)
|
|
299
|
-
modified ||= !new_value.equal?(value)
|
|
300
|
-
new_value
|
|
301
|
-
end
|
|
302
|
-
modified ? new_array : container
|
|
303
|
-
when Hash
|
|
304
|
-
modified = false
|
|
305
|
-
new_hash = container.each_with_object({}) do |(key, value), memo|
|
|
306
|
-
new_value = replace_in_object(value, target, replacement, visited)
|
|
307
|
-
modified ||= !new_value.equal?(value)
|
|
308
|
-
memo[key] = new_value
|
|
309
|
-
end
|
|
310
|
-
modified ? new_hash : container
|
|
311
|
-
else
|
|
312
|
-
container
|
|
313
|
-
end
|
|
314
|
-
end
|
|
315
|
-
|
|
316
|
-
sig { params(owner: T.untyped, target: T.untyped, replacement: T.untyped).void }
|
|
317
|
-
def replace_reference(owner, target, replacement)
|
|
318
|
-
return if owner.equal?(target)
|
|
319
|
-
|
|
320
|
-
Array(owner.instance_variables).each do |ivar|
|
|
321
|
-
value = owner.instance_variable_get(ivar)
|
|
322
|
-
next if value.nil?
|
|
323
|
-
|
|
324
|
-
new_value = replace_in_object(value, target, replacement, ::Set.new)
|
|
325
|
-
unless new_value.equal?(value)
|
|
326
|
-
owner.instance_variable_set(ivar, new_value)
|
|
327
|
-
end
|
|
328
|
-
end
|
|
329
|
-
end
|
|
330
|
-
|
|
331
|
-
sig { params(program: DSPy::Module, recorder: T.nilable(T.untyped)).void }
|
|
332
|
-
def wrap_predictors_for_tracing!(program, recorder: nil)
|
|
333
|
-
return unless recorder
|
|
334
|
-
|
|
335
|
-
resolve_predictors(program).each do |name, predictor|
|
|
336
|
-
wrap_predictor_for_tracing(program, predictor, name, recorder)
|
|
337
|
-
end
|
|
338
|
-
end
|
|
339
|
-
|
|
340
|
-
sig { params(program: DSPy::Module, predictor: DSPy::Module, name: String, recorder: T.untyped).void }
|
|
341
|
-
def wrap_predictor_for_tracing(program, predictor, name, recorder)
|
|
342
|
-
original_forward = predictor.method(:forward_untyped)
|
|
343
|
-
recorder_ref = recorder
|
|
344
|
-
predictor_name = name
|
|
345
|
-
|
|
346
|
-
predictor.define_singleton_method(:forward_untyped) do |**input_values|
|
|
347
|
-
result = original_forward.call(**input_values)
|
|
348
|
-
recorder_ref.record(
|
|
349
|
-
predictor_name: predictor_name,
|
|
350
|
-
inputs: input_values.dup,
|
|
351
|
-
output: result
|
|
352
|
-
)
|
|
353
|
-
result
|
|
354
|
-
end
|
|
355
|
-
end
|
|
356
|
-
|
|
357
|
-
sig { params(predictor: DSPy::Module, instruction: String).returns(DSPy::Module) }
|
|
358
|
-
def apply_instruction_to_predictor(predictor, instruction)
|
|
359
|
-
if predictor.respond_to?(:with_instruction)
|
|
360
|
-
predictor.with_instruction(instruction)
|
|
361
|
-
elsif predictor.respond_to?(:prompt) && predictor.prompt.respond_to?(:with_instruction)
|
|
362
|
-
predictor.with_prompt(predictor.prompt.with_instruction(instruction))
|
|
363
|
-
else
|
|
364
|
-
duplicate = safe_clone(predictor)
|
|
365
|
-
signature = DSPy::Teleprompt::Utils.get_signature(duplicate)
|
|
366
|
-
updated_signature = signature.with_instructions(instruction)
|
|
367
|
-
DSPy::Teleprompt::Utils.set_signature(duplicate, updated_signature)
|
|
368
|
-
duplicate
|
|
369
|
-
end
|
|
370
|
-
end
|
|
371
|
-
|
|
372
|
-
sig { params(object: T.untyped).returns(T.untyped) }
|
|
373
|
-
def safe_clone(object)
|
|
374
|
-
object.clone
|
|
375
|
-
rescue TypeError
|
|
376
|
-
object.dup
|
|
377
|
-
end
|
|
378
|
-
|
|
379
|
-
class TraceRecorder
|
|
380
|
-
def initialize
|
|
381
|
-
@current_trace = nil
|
|
382
|
-
end
|
|
383
|
-
|
|
384
|
-
def start_example
|
|
385
|
-
@current_trace = []
|
|
386
|
-
end
|
|
387
|
-
|
|
388
|
-
def record(entry)
|
|
389
|
-
return unless @current_trace
|
|
390
|
-
@current_trace << entry
|
|
391
|
-
end
|
|
392
|
-
|
|
393
|
-
def finish_example
|
|
394
|
-
trace = @current_trace || []
|
|
395
|
-
@current_trace = nil
|
|
396
|
-
trace
|
|
397
|
-
end
|
|
398
|
-
end
|
|
399
|
-
|
|
400
|
-
sig { params(program: DSPy::Module).returns(String) }
|
|
401
|
-
def extract_instruction(program)
|
|
402
|
-
if program.respond_to?(:prompt) && program.prompt.respond_to?(:instruction)
|
|
403
|
-
program.prompt.instruction
|
|
404
|
-
elsif program.respond_to?(:instruction)
|
|
405
|
-
program.instruction
|
|
406
|
-
else
|
|
407
|
-
raise ArgumentError, "Program must expose prompt.instruction or #instruction"
|
|
408
|
-
end
|
|
409
|
-
end
|
|
410
|
-
|
|
411
|
-
sig { params(struct: T.untyped).returns(T::Hash[Symbol, T.untyped]) }
|
|
412
|
-
def serialize_struct(struct)
|
|
413
|
-
if struct.respond_to?(:to_h)
|
|
414
|
-
struct.to_h
|
|
415
|
-
elsif struct.instance_variables.any?
|
|
416
|
-
struct.instance_variables.each_with_object({}) do |ivar, memo|
|
|
417
|
-
key = ivar.to_s.delete_prefix('@').to_sym
|
|
418
|
-
memo[key] = struct.instance_variable_get(ivar)
|
|
419
|
-
end
|
|
420
|
-
else
|
|
421
|
-
{}
|
|
422
|
-
end
|
|
423
|
-
end
|
|
424
|
-
|
|
425
|
-
sig { params(prediction: T.untyped).returns(T::Hash[Symbol, T.untyped]) }
|
|
426
|
-
def serialize_prediction(prediction)
|
|
427
|
-
case prediction
|
|
428
|
-
when DSPy::Prediction
|
|
429
|
-
prediction.to_h
|
|
430
|
-
when Hash
|
|
431
|
-
prediction
|
|
432
|
-
else
|
|
433
|
-
serialize_struct(prediction)
|
|
434
|
-
end
|
|
435
|
-
end
|
|
436
|
-
|
|
437
|
-
sig { params(expected: T::Hash[Symbol, T.untyped], actual: T::Hash[Symbol, T.untyped]).returns(T::Hash[Symbol, T.untyped]) }
|
|
438
|
-
def build_diff(expected, actual)
|
|
439
|
-
keys = expected.keys | actual.keys
|
|
440
|
-
keys.each_with_object({}) do |key, memo|
|
|
441
|
-
exp = expected[key]
|
|
442
|
-
act = actual[key]
|
|
443
|
-
next if exp == act
|
|
444
|
-
|
|
445
|
-
memo[key] = { expected: exp, actual: act }
|
|
446
|
-
end
|
|
447
|
-
end
|
|
448
|
-
|
|
449
|
-
sig { params(result: T.untyped).returns([Float, T.nilable(String)]) }
|
|
450
|
-
def extract_score_and_feedback(result)
|
|
451
|
-
case result
|
|
452
|
-
when DSPy::Prediction
|
|
453
|
-
score = result.respond_to?(:score) ? result.score : 0.0
|
|
454
|
-
feedback = result.respond_to?(:feedback) ? result.feedback : nil
|
|
455
|
-
[score.to_f, feedback]
|
|
456
|
-
when Hash
|
|
457
|
-
[result[:score].to_f, result[:feedback]]
|
|
458
|
-
else
|
|
459
|
-
[result.to_f, nil]
|
|
460
|
-
end
|
|
461
|
-
end
|
|
462
|
-
end
|
|
463
|
-
|
|
464
|
-
sig do
|
|
465
|
-
params(
|
|
466
|
-
metric: T.proc.params(arg0: DSPy::Example, arg1: T.untyped).returns(T.untyped),
|
|
467
|
-
reflection_lm: T.nilable(T.untyped),
|
|
468
|
-
feedback_map: T.nilable(T::Hash[String, PredictAdapter::FeedbackFnType]),
|
|
469
|
-
adapter_builder: T.nilable(T.proc.returns(T.untyped)),
|
|
470
|
-
config: T.nilable(T::Hash[Symbol, T.untyped])
|
|
471
|
-
).void
|
|
472
|
-
end
|
|
473
|
-
def initialize(metric:, reflection_lm: nil, feedback_map: nil, adapter_builder: nil, config: nil)
|
|
474
|
-
super(metric: metric)
|
|
475
|
-
@metric = metric
|
|
476
|
-
@reflection_lm = reflection_lm
|
|
477
|
-
@feedback_map = (feedback_map || {}).transform_keys(&:to_s)
|
|
478
|
-
@adapter_builder = adapter_builder || method(:build_adapter)
|
|
479
|
-
@gepa_config = self.class.default_config.merge(config || {})
|
|
480
|
-
end
|
|
481
|
-
|
|
482
|
-
sig do
|
|
483
|
-
override.params(
|
|
484
|
-
program: DSPy::Module,
|
|
485
|
-
trainset: T::Array[T.untyped],
|
|
486
|
-
valset: T.nilable(T::Array[T.untyped])
|
|
487
|
-
).returns(OptimizationResult)
|
|
488
|
-
end
|
|
489
|
-
def compile(program, trainset:, valset: nil)
|
|
490
|
-
validate_inputs(program, trainset, valset)
|
|
491
|
-
|
|
492
|
-
typed_trainset = ensure_typed_examples(trainset)
|
|
493
|
-
typed_valset = valset ? ensure_typed_examples(valset) : typed_trainset
|
|
494
|
-
|
|
495
|
-
adapter = @adapter_builder.call(
|
|
496
|
-
program,
|
|
497
|
-
@metric,
|
|
498
|
-
reflection_lm: @reflection_lm,
|
|
499
|
-
feedback_map: @feedback_map
|
|
500
|
-
)
|
|
501
|
-
seed_candidate = adapter.seed_candidate
|
|
502
|
-
|
|
503
|
-
cand_selector = ::GEPA::Strategies::ParetoCandidateSelector.new
|
|
504
|
-
comp_selector = ::GEPA::Strategies::RoundRobinReflectionComponentSelector.new
|
|
505
|
-
batch_sampler = ::GEPA::Strategies::EpochShuffledBatchSampler.new([@gepa_config[:minibatch_size], typed_trainset.size].min)
|
|
506
|
-
|
|
507
|
-
telemetry_context = ::GEPA::Telemetry.build_context
|
|
508
|
-
|
|
509
|
-
logger = ::GEPA::Logging::BufferingLogger.new
|
|
510
|
-
tracker = ::GEPA::Logging::ExperimentTracker.new
|
|
511
|
-
|
|
512
|
-
reflective = ::GEPA::Proposer::ReflectiveMutationProposer.new(
|
|
513
|
-
logger: logger,
|
|
514
|
-
trainset: typed_trainset,
|
|
515
|
-
adapter: adapter,
|
|
516
|
-
candidate_selector: cand_selector,
|
|
517
|
-
module_selector: comp_selector,
|
|
518
|
-
batch_sampler: batch_sampler,
|
|
519
|
-
perfect_score: @gepa_config[:perfect_score],
|
|
520
|
-
skip_perfect_score: @gepa_config[:skip_perfect_score],
|
|
521
|
-
experiment_tracker: tracker,
|
|
522
|
-
reflection_lm: nil,
|
|
523
|
-
telemetry: telemetry_context
|
|
524
|
-
)
|
|
525
|
-
|
|
526
|
-
evaluator = lambda do |dataset, candidate|
|
|
527
|
-
batch = adapter.evaluate(dataset, candidate, capture_traces: false)
|
|
528
|
-
[batch.outputs, batch.scores]
|
|
529
|
-
end
|
|
530
|
-
|
|
531
|
-
merge_proposer = nil
|
|
532
|
-
if @gepa_config[:use_merge]
|
|
533
|
-
merge_proposer = ::GEPA::Proposer::MergeProposer.new(
|
|
534
|
-
logger: logger,
|
|
535
|
-
valset: typed_valset,
|
|
536
|
-
evaluator: evaluator,
|
|
537
|
-
use_merge: true,
|
|
538
|
-
max_merge_invocations: @gepa_config[:max_merge_invocations],
|
|
539
|
-
rng: Random.new(0),
|
|
540
|
-
telemetry: telemetry_context
|
|
541
|
-
)
|
|
542
|
-
end
|
|
543
|
-
|
|
544
|
-
engine = ::GEPA::Core::Engine.new(
|
|
545
|
-
evaluator: evaluator,
|
|
546
|
-
valset: typed_valset,
|
|
547
|
-
seed_candidate: seed_candidate,
|
|
548
|
-
max_metric_calls: @gepa_config[:max_metric_calls],
|
|
549
|
-
perfect_score: @gepa_config[:perfect_score],
|
|
550
|
-
seed: 0,
|
|
551
|
-
reflective_proposer: reflective,
|
|
552
|
-
logger: logger,
|
|
553
|
-
experiment_tracker: tracker,
|
|
554
|
-
merge_proposer: merge_proposer,
|
|
555
|
-
run_dir: nil,
|
|
556
|
-
track_best_outputs: false,
|
|
557
|
-
display_progress_bar: false,
|
|
558
|
-
telemetry: telemetry_context,
|
|
559
|
-
raise_on_exception: true
|
|
560
|
-
)
|
|
561
|
-
|
|
562
|
-
state = engine.run
|
|
563
|
-
result = ::GEPA::Core::Result.from_state(state)
|
|
564
|
-
best_program = adapter.build_program(result.best_candidate)
|
|
565
|
-
|
|
566
|
-
OptimizationResult.new(
|
|
567
|
-
optimized_program: best_program,
|
|
568
|
-
scores: { best: result.val_aggregate_scores[result.best_idx] },
|
|
569
|
-
history: { total_candidates: result.num_candidates },
|
|
570
|
-
best_score_name: 'best',
|
|
571
|
-
best_score_value: result.val_aggregate_scores[result.best_idx],
|
|
572
|
-
metadata: { candidates: result.num_candidates }
|
|
573
|
-
)
|
|
574
|
-
end
|
|
575
|
-
|
|
576
|
-
private
|
|
577
|
-
|
|
578
|
-
sig do
|
|
579
|
-
params(
|
|
580
|
-
program: DSPy::Module,
|
|
581
|
-
metric: T.proc.params(arg0: DSPy::Example, arg1: T.untyped).returns(T.untyped),
|
|
582
|
-
reflection_lm: T.nilable(T.untyped),
|
|
583
|
-
feedback_map: T::Hash[String, PredictAdapter::FeedbackFnType]
|
|
584
|
-
).returns(PredictAdapter)
|
|
585
|
-
end
|
|
586
|
-
def build_adapter(program, metric, reflection_lm: nil, feedback_map: {})
|
|
587
|
-
PredictAdapter.new(program, metric, reflection_lm: reflection_lm, feedback_map: feedback_map)
|
|
588
|
-
end
|
|
589
|
-
end
|
|
590
|
-
end
|
|
4
|
+
begin
|
|
5
|
+
require 'dspy/gepa'
|
|
6
|
+
rescue LoadError => e
|
|
7
|
+
raise LoadError, <<~MSG
|
|
8
|
+
DSPy::Teleprompt::GEPA has moved to the optional 'dspy-gepa' gem.
|
|
9
|
+
Add `gem 'dspy-gepa'` (and set DSPY_WITH_GEPA=1 when bundling from the monorepo) to use GEPA.
|
|
10
|
+
Original error: #{e.message}
|
|
11
|
+
MSG
|
|
591
12
|
end
|
|
@@ -0,0 +1,94 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require 'set'
|
|
4
|
+
require 'sorbet-runtime'
|
|
5
|
+
require_relative '../errors'
|
|
6
|
+
|
|
7
|
+
module DSPy
|
|
8
|
+
module Teleprompt
|
|
9
|
+
module InstructionUpdates
|
|
10
|
+
extend T::Sig
|
|
11
|
+
|
|
12
|
+
module_function
|
|
13
|
+
|
|
14
|
+
sig { params(predictor: T.untyped).void }
|
|
15
|
+
def ensure_instruction_capability!(predictor)
|
|
16
|
+
return if predictor.respond_to?(:with_instruction)
|
|
17
|
+
raise DSPy::InstructionUpdateError.missing_instruction_capability(predictor.class)
|
|
18
|
+
end
|
|
19
|
+
|
|
20
|
+
sig { params(predictor: T.untyped).void }
|
|
21
|
+
def ensure_examples_capability!(predictor)
|
|
22
|
+
return if predictor.respond_to?(:with_examples)
|
|
23
|
+
raise DSPy::InstructionUpdateError.missing_examples_capability(predictor.class)
|
|
24
|
+
end
|
|
25
|
+
|
|
26
|
+
sig { params(owner: T.untyped, predictor: T.untyped, instruction: String).returns([T.untyped, T.untyped]) }
|
|
27
|
+
def apply_instruction(owner, predictor, instruction)
|
|
28
|
+
ensure_instruction_capability!(predictor)
|
|
29
|
+
updated = predictor.with_instruction(instruction)
|
|
30
|
+
[replace_reference(owner, predictor, updated), updated]
|
|
31
|
+
end
|
|
32
|
+
|
|
33
|
+
sig { params(owner: T.untyped, predictor: T.untyped, examples: T::Array[T.untyped]).returns([T.untyped, T.untyped]) }
|
|
34
|
+
def apply_examples(owner, predictor, examples)
|
|
35
|
+
ensure_examples_capability!(predictor)
|
|
36
|
+
updated = predictor.with_examples(examples)
|
|
37
|
+
[replace_reference(owner, predictor, updated), updated]
|
|
38
|
+
end
|
|
39
|
+
|
|
40
|
+
sig { params(owner: T.untyped, target: T.untyped, replacement: T.untyped).returns(T.untyped) }
|
|
41
|
+
def replace_reference(owner, target, replacement)
|
|
42
|
+
return replacement if owner.equal?(target)
|
|
43
|
+
|
|
44
|
+
Array(owner.instance_variables).each do |ivar|
|
|
45
|
+
value = owner.instance_variable_get(ivar)
|
|
46
|
+
next if value.nil?
|
|
47
|
+
|
|
48
|
+
new_value = replace_in_object(value, target, replacement, ::Set.new)
|
|
49
|
+
unless new_value.equal?(value)
|
|
50
|
+
owner.instance_variable_set(ivar, new_value)
|
|
51
|
+
end
|
|
52
|
+
end
|
|
53
|
+
|
|
54
|
+
owner
|
|
55
|
+
end
|
|
56
|
+
|
|
57
|
+
sig do
|
|
58
|
+
params(
|
|
59
|
+
container: T.untyped,
|
|
60
|
+
target: T.untyped,
|
|
61
|
+
replacement: T.untyped,
|
|
62
|
+
visited: ::Set[Integer]
|
|
63
|
+
).returns(T.untyped)
|
|
64
|
+
end
|
|
65
|
+
def replace_in_object(container, target, replacement, visited)
|
|
66
|
+
return replacement if container.equal?(target)
|
|
67
|
+
return container if visited.include?(container.object_id)
|
|
68
|
+
|
|
69
|
+
visited.add(container.object_id)
|
|
70
|
+
|
|
71
|
+
case container
|
|
72
|
+
when Array
|
|
73
|
+
modified = false
|
|
74
|
+
new_array = container.map do |value|
|
|
75
|
+
new_value = replace_in_object(value, target, replacement, visited)
|
|
76
|
+
modified ||= !new_value.equal?(value)
|
|
77
|
+
new_value
|
|
78
|
+
end
|
|
79
|
+
modified ? new_array : container
|
|
80
|
+
when Hash
|
|
81
|
+
modified = false
|
|
82
|
+
new_hash = container.each_with_object({}) do |(key, value), memo|
|
|
83
|
+
new_value = replace_in_object(value, target, replacement, visited)
|
|
84
|
+
modified ||= !new_value.equal?(value)
|
|
85
|
+
memo[key] = new_value
|
|
86
|
+
end
|
|
87
|
+
modified ? new_hash : container
|
|
88
|
+
else
|
|
89
|
+
container
|
|
90
|
+
end
|
|
91
|
+
end
|
|
92
|
+
end
|
|
93
|
+
end
|
|
94
|
+
end
|