dspy 0.3.1 → 0.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +69 -382
- data/lib/dspy/chain_of_thought.rb +57 -0
- data/lib/dspy/evaluate.rb +554 -0
- data/lib/dspy/example.rb +203 -0
- data/lib/dspy/few_shot_example.rb +81 -0
- data/lib/dspy/instrumentation.rb +97 -8
- data/lib/dspy/lm/adapter_factory.rb +6 -8
- data/lib/dspy/lm.rb +5 -7
- data/lib/dspy/predict.rb +32 -34
- data/lib/dspy/prompt.rb +222 -0
- data/lib/dspy/propose/grounded_proposer.rb +560 -0
- data/lib/dspy/registry/registry_manager.rb +504 -0
- data/lib/dspy/registry/signature_registry.rb +725 -0
- data/lib/dspy/storage/program_storage.rb +442 -0
- data/lib/dspy/storage/storage_manager.rb +331 -0
- data/lib/dspy/subscribers/langfuse_subscriber.rb +669 -0
- data/lib/dspy/subscribers/logger_subscriber.rb +120 -0
- data/lib/dspy/subscribers/newrelic_subscriber.rb +686 -0
- data/lib/dspy/subscribers/otel_subscriber.rb +538 -0
- data/lib/dspy/teleprompt/data_handler.rb +107 -0
- data/lib/dspy/teleprompt/mipro_v2.rb +790 -0
- data/lib/dspy/teleprompt/simple_optimizer.rb +497 -0
- data/lib/dspy/teleprompt/teleprompter.rb +336 -0
- data/lib/dspy/teleprompt/utils.rb +380 -0
- data/lib/dspy/version.rb +5 -0
- data/lib/dspy.rb +16 -0
- metadata +29 -12
- data/lib/dspy/lm/adapters/ruby_llm_adapter.rb +0 -81
@@ -0,0 +1,336 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require 'sorbet-runtime'
|
4
|
+
require_relative '../instrumentation'
|
5
|
+
require_relative '../evaluate'
|
6
|
+
require_relative '../example'
|
7
|
+
|
8
|
+
module DSPy
|
9
|
+
module Teleprompt
|
10
|
+
# Base class for all DSPy teleprompters (optimizers)
|
11
|
+
# Defines the common interface and provides shared functionality for prompt optimization
|
12
|
+
class Teleprompter
|
13
|
+
extend T::Sig
|
14
|
+
|
15
|
+
# Configuration for optimization runs
|
16
|
+
class Config
|
17
|
+
extend T::Sig
|
18
|
+
|
19
|
+
sig { returns(T.nilable(Integer)) }
|
20
|
+
attr_accessor :max_bootstrapped_examples
|
21
|
+
|
22
|
+
sig { returns(T.nilable(Integer)) }
|
23
|
+
attr_accessor :max_labeled_examples
|
24
|
+
|
25
|
+
sig { returns(T.nilable(Integer)) }
|
26
|
+
attr_accessor :num_candidate_examples
|
27
|
+
|
28
|
+
sig { returns(T.nilable(Integer)) }
|
29
|
+
attr_accessor :num_threads
|
30
|
+
|
31
|
+
sig { returns(T.nilable(Integer)) }
|
32
|
+
attr_accessor :max_errors
|
33
|
+
|
34
|
+
sig { returns(T::Boolean) }
|
35
|
+
attr_accessor :require_validation_examples
|
36
|
+
|
37
|
+
sig { returns(T::Boolean) }
|
38
|
+
attr_accessor :save_intermediate_results
|
39
|
+
|
40
|
+
sig { returns(T.nilable(String)) }
|
41
|
+
attr_accessor :save_path
|
42
|
+
|
43
|
+
sig { void }
|
44
|
+
def initialize
|
45
|
+
@max_bootstrapped_examples = 4
|
46
|
+
@max_labeled_examples = 16
|
47
|
+
@num_candidate_examples = 50
|
48
|
+
@num_threads = 1
|
49
|
+
@max_errors = 5
|
50
|
+
@require_validation_examples = true
|
51
|
+
@save_intermediate_results = false
|
52
|
+
@save_path = nil
|
53
|
+
end
|
54
|
+
|
55
|
+
sig { returns(T::Hash[Symbol, T.untyped]) }
|
56
|
+
def to_h
|
57
|
+
{
|
58
|
+
max_bootstrapped_examples: @max_bootstrapped_examples,
|
59
|
+
max_labeled_examples: @max_labeled_examples,
|
60
|
+
num_candidate_examples: @num_candidate_examples,
|
61
|
+
num_threads: @num_threads,
|
62
|
+
max_errors: @max_errors,
|
63
|
+
require_validation_examples: @require_validation_examples,
|
64
|
+
save_intermediate_results: @save_intermediate_results,
|
65
|
+
save_path: @save_path
|
66
|
+
}
|
67
|
+
end
|
68
|
+
end
|
69
|
+
|
70
|
+
# Result of an optimization run
|
71
|
+
class OptimizationResult
|
72
|
+
extend T::Sig
|
73
|
+
|
74
|
+
sig { returns(T.untyped) }
|
75
|
+
attr_reader :optimized_program
|
76
|
+
|
77
|
+
sig { returns(T::Hash[Symbol, T.untyped]) }
|
78
|
+
attr_reader :scores
|
79
|
+
|
80
|
+
sig { returns(T::Hash[Symbol, T.untyped]) }
|
81
|
+
attr_reader :history
|
82
|
+
|
83
|
+
sig { returns(T.nilable(String)) }
|
84
|
+
attr_reader :best_score_name
|
85
|
+
|
86
|
+
sig { returns(T.nilable(Float)) }
|
87
|
+
attr_reader :best_score_value
|
88
|
+
|
89
|
+
sig { returns(T::Hash[Symbol, T.untyped]) }
|
90
|
+
attr_reader :metadata
|
91
|
+
|
92
|
+
sig do
|
93
|
+
params(
|
94
|
+
optimized_program: T.untyped,
|
95
|
+
scores: T::Hash[Symbol, T.untyped],
|
96
|
+
history: T::Hash[Symbol, T.untyped],
|
97
|
+
best_score_name: T.nilable(String),
|
98
|
+
best_score_value: T.nilable(Float),
|
99
|
+
metadata: T::Hash[Symbol, T.untyped]
|
100
|
+
).void
|
101
|
+
end
|
102
|
+
def initialize(optimized_program:, scores:, history:, best_score_name: nil, best_score_value: nil, metadata: {})
|
103
|
+
@optimized_program = optimized_program
|
104
|
+
@scores = scores.freeze
|
105
|
+
@history = history.freeze
|
106
|
+
@best_score_name = best_score_name
|
107
|
+
@best_score_value = best_score_value
|
108
|
+
@metadata = metadata.freeze
|
109
|
+
end
|
110
|
+
|
111
|
+
sig { returns(T::Hash[Symbol, T.untyped]) }
|
112
|
+
def to_h
|
113
|
+
{
|
114
|
+
scores: @scores,
|
115
|
+
history: @history,
|
116
|
+
best_score_name: @best_score_name,
|
117
|
+
best_score_value: @best_score_value,
|
118
|
+
metadata: @metadata
|
119
|
+
}
|
120
|
+
end
|
121
|
+
end
|
122
|
+
|
123
|
+
sig { returns(Config) }
|
124
|
+
attr_reader :config
|
125
|
+
|
126
|
+
sig { returns(T.nilable(T.proc.params(arg0: T.untyped, arg1: T.untyped).returns(T.untyped))) }
|
127
|
+
attr_reader :metric
|
128
|
+
|
129
|
+
sig { returns(T.nilable(DSPy::Evaluate)) }
|
130
|
+
attr_reader :evaluator
|
131
|
+
|
132
|
+
sig do
|
133
|
+
params(
|
134
|
+
metric: T.nilable(T.proc.params(arg0: T.untyped, arg1: T.untyped).returns(T.untyped)),
|
135
|
+
config: T.nilable(Config)
|
136
|
+
).void
|
137
|
+
end
|
138
|
+
def initialize(metric: nil, config: nil)
|
139
|
+
@metric = metric
|
140
|
+
@config = config || Config.new
|
141
|
+
@evaluator = nil
|
142
|
+
end
|
143
|
+
|
144
|
+
# Main optimization method - must be implemented by subclasses
|
145
|
+
sig do
|
146
|
+
params(
|
147
|
+
program: T.untyped,
|
148
|
+
trainset: T::Array[T.untyped],
|
149
|
+
valset: T.nilable(T::Array[T.untyped])
|
150
|
+
).returns(OptimizationResult)
|
151
|
+
end
|
152
|
+
def compile(program, trainset:, valset: nil)
|
153
|
+
raise NotImplementedError, "Subclasses must implement the compile method"
|
154
|
+
end
|
155
|
+
|
156
|
+
# Validate optimization inputs
|
157
|
+
sig do
|
158
|
+
params(
|
159
|
+
program: T.untyped,
|
160
|
+
trainset: T::Array[T.untyped],
|
161
|
+
valset: T.nilable(T::Array[T.untyped])
|
162
|
+
).void
|
163
|
+
end
|
164
|
+
def validate_inputs(program, trainset, valset = nil)
|
165
|
+
raise ArgumentError, "Program cannot be nil" unless program
|
166
|
+
raise ArgumentError, "Training set cannot be empty" if trainset.empty?
|
167
|
+
|
168
|
+
if @config.require_validation_examples && (valset.nil? || valset.empty?)
|
169
|
+
raise ArgumentError, "Validation set is required but not provided"
|
170
|
+
end
|
171
|
+
|
172
|
+
# Validate training examples
|
173
|
+
validate_examples(trainset, "training")
|
174
|
+
validate_examples(valset, "validation") if valset && valset.any?
|
175
|
+
end
|
176
|
+
|
177
|
+
# Ensure examples are properly typed (only DSPy::Example instances supported)
|
178
|
+
sig { params(examples: T::Array[T.untyped], signature_class: T.nilable(T.class_of(Signature))).returns(T::Array[DSPy::Example]) }
|
179
|
+
def ensure_typed_examples(examples, signature_class = nil)
|
180
|
+
# If examples are already DSPy::Example objects, return as-is
|
181
|
+
return examples if examples.all? { |ex| ex.is_a?(DSPy::Example) }
|
182
|
+
|
183
|
+
raise ArgumentError, "All examples must be DSPy::Example instances. Legacy format support has been removed. Please convert your examples to use the structured format with :input and :expected keys."
|
184
|
+
end
|
185
|
+
|
186
|
+
# Create evaluator for given examples and metric
|
187
|
+
sig { params(examples: T::Array[T.untyped]).returns(DSPy::Evaluate) }
|
188
|
+
def create_evaluator(examples)
|
189
|
+
# Use provided metric or create a default one for DSPy::Example objects
|
190
|
+
evaluation_metric = @metric || default_metric_for_examples(examples)
|
191
|
+
|
192
|
+
@evaluator = DSPy::Evaluate.new(
|
193
|
+
nil, # Program will be set during evaluation
|
194
|
+
metric: evaluation_metric,
|
195
|
+
num_threads: @config.num_threads,
|
196
|
+
max_errors: @config.max_errors
|
197
|
+
)
|
198
|
+
end
|
199
|
+
|
200
|
+
# Evaluate program performance on given examples
|
201
|
+
sig do
|
202
|
+
params(
|
203
|
+
program: T.untyped,
|
204
|
+
examples: T::Array[T.untyped],
|
205
|
+
metric: T.nilable(T.proc.params(arg0: T.untyped, arg1: T.untyped).returns(T.untyped))
|
206
|
+
).returns(DSPy::Evaluate::BatchEvaluationResult)
|
207
|
+
end
|
208
|
+
def evaluate_program(program, examples, metric: nil)
|
209
|
+
evaluation_metric = metric || @metric || default_metric_for_examples(examples)
|
210
|
+
|
211
|
+
evaluator = DSPy::Evaluate.new(
|
212
|
+
program,
|
213
|
+
metric: evaluation_metric,
|
214
|
+
num_threads: @config.num_threads,
|
215
|
+
max_errors: @config.max_errors
|
216
|
+
)
|
217
|
+
|
218
|
+
evaluator.evaluate(examples, display_progress: false)
|
219
|
+
end
|
220
|
+
|
221
|
+
# Save optimization results if configured
|
222
|
+
sig { params(result: OptimizationResult).void }
|
223
|
+
def save_results(result)
|
224
|
+
# Legacy file-based saving
|
225
|
+
if @config.save_intermediate_results && @config.save_path
|
226
|
+
File.open(@config.save_path, 'w') do |f|
|
227
|
+
f.write(JSON.pretty_generate(result.to_h))
|
228
|
+
end
|
229
|
+
end
|
230
|
+
|
231
|
+
# Modern storage system integration
|
232
|
+
if @config.save_intermediate_results
|
233
|
+
storage_manager = DSPy::Storage::StorageManager.instance
|
234
|
+
storage_manager.save_optimization_result(
|
235
|
+
result,
|
236
|
+
tags: [self.class.name.split('::').last.downcase],
|
237
|
+
description: "Optimization by #{self.class.name}",
|
238
|
+
metadata: {
|
239
|
+
teleprompter_class: self.class.name,
|
240
|
+
config: @config.to_h,
|
241
|
+
optimization_duration: result.metadata[:optimization_duration] || 0
|
242
|
+
}
|
243
|
+
)
|
244
|
+
end
|
245
|
+
|
246
|
+
# Registry system integration for version management
|
247
|
+
if @config.save_intermediate_results
|
248
|
+
registry_manager = DSPy::Registry::RegistryManager.instance
|
249
|
+
registry_manager.register_optimization_result(
|
250
|
+
result,
|
251
|
+
metadata: {
|
252
|
+
teleprompter_class: self.class.name,
|
253
|
+
config: @config.to_h
|
254
|
+
}
|
255
|
+
)
|
256
|
+
end
|
257
|
+
end
|
258
|
+
|
259
|
+
protected
|
260
|
+
|
261
|
+
# Validate that examples are in the correct format
|
262
|
+
sig { params(examples: T.nilable(T::Array[T.untyped]), context: String).void }
|
263
|
+
def validate_examples(examples, context)
|
264
|
+
return unless examples
|
265
|
+
|
266
|
+
examples.each_with_index do |example, index|
|
267
|
+
validate_single_example(example, "#{context} example #{index}")
|
268
|
+
end
|
269
|
+
end
|
270
|
+
|
271
|
+
# Validate a single example
|
272
|
+
sig { params(example: T.untyped, context: String).void }
|
273
|
+
def validate_single_example(example, context)
|
274
|
+
case example
|
275
|
+
when DSPy::Example
|
276
|
+
# Already validated
|
277
|
+
return
|
278
|
+
when Hash
|
279
|
+
# Only support structured format with :input and :expected keys
|
280
|
+
if example.key?(:input) && example.key?(:expected)
|
281
|
+
return
|
282
|
+
elsif example.key?('input') && example.key?('expected')
|
283
|
+
return
|
284
|
+
end
|
285
|
+
else
|
286
|
+
# Check if it's an object with the right methods
|
287
|
+
return if example.respond_to?(:input) && example.respond_to?(:expected)
|
288
|
+
end
|
289
|
+
|
290
|
+
raise ArgumentError, "Invalid #{context}: must be DSPy::Example or structured hash with :input and :expected keys. Legacy flat format is no longer supported."
|
291
|
+
end
|
292
|
+
|
293
|
+
|
294
|
+
# Infer signature class from examples
|
295
|
+
sig { params(examples: T::Array[T.untyped]).returns(T.nilable(T.class_of(Signature))) }
|
296
|
+
def infer_signature_class(examples)
|
297
|
+
require_relative 'utils'
|
298
|
+
Utils.infer_signature_class(examples)
|
299
|
+
end
|
300
|
+
|
301
|
+
# Create a default metric for examples
|
302
|
+
sig { params(examples: T::Array[T.untyped]).returns(T.nilable(T.proc.params(arg0: T.untyped, arg1: T.untyped).returns(T::Boolean))) }
|
303
|
+
def default_metric_for_examples(examples)
|
304
|
+
# For DSPy::Example objects, use built-in matching
|
305
|
+
if examples.first.is_a?(DSPy::Example)
|
306
|
+
proc { |example, prediction| example.matches_prediction?(prediction) }
|
307
|
+
else
|
308
|
+
# For other formats, no default metric
|
309
|
+
nil
|
310
|
+
end
|
311
|
+
end
|
312
|
+
|
313
|
+
# Instrument optimization steps
|
314
|
+
sig { params(step_name: String, payload: T::Hash[Symbol, T.untyped], block: T.proc.returns(T.untyped)).returns(T.untyped) }
|
315
|
+
def instrument_step(step_name, payload = {}, &block)
|
316
|
+
event_name = "dspy.optimization.#{step_name}"
|
317
|
+
|
318
|
+
Instrumentation.instrument(event_name, payload.merge({
|
319
|
+
teleprompter_class: self.class.name,
|
320
|
+
config: @config.to_h
|
321
|
+
}), &block)
|
322
|
+
end
|
323
|
+
|
324
|
+
# Emit optimization events
|
325
|
+
sig { params(event_name: String, payload: T::Hash[Symbol, T.untyped]).void }
|
326
|
+
def emit_event(event_name, payload = {})
|
327
|
+
full_event_name = "dspy.optimization.#{event_name}"
|
328
|
+
|
329
|
+
Instrumentation.emit(full_event_name, payload.merge({
|
330
|
+
teleprompter_class: self.class.name,
|
331
|
+
timestamp: Time.now.iso8601
|
332
|
+
}))
|
333
|
+
end
|
334
|
+
end
|
335
|
+
end
|
336
|
+
end
|