dspy 0.28.0 → 0.28.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/dspy/callbacks.rb +222 -0
- data/lib/dspy/chain_of_thought.rb +2 -1
- data/lib/dspy/lm/adapters/gemini/schema_converter.rb +25 -16
- data/lib/dspy/lm/json_strategy.rb +0 -5
- data/lib/dspy/lm.rb +38 -9
- data/lib/dspy/mixins/type_coercion.rb +7 -7
- data/lib/dspy/module.rb +33 -0
- data/lib/dspy/predict.rb +7 -0
- data/lib/dspy/prompt.rb +90 -20
- data/lib/dspy/propose/dataset_summary_generator.rb +177 -0
- data/lib/dspy/propose/grounded_proposer.rb +208 -61
- data/lib/dspy/structured_outputs_prompt.rb +53 -0
- data/lib/dspy/teleprompt/bootstrap_strategy.rb +26 -0
- data/lib/dspy/teleprompt/mipro_v2.rb +81 -56
- data/lib/dspy/teleprompt/simple_optimizer.rb +40 -34
- data/lib/dspy/teleprompt/utils.rb +343 -41
- data/lib/dspy/version.rb +1 -1
- data/lib/dspy.rb +1 -0
- metadata +20 -2
data/lib/dspy/prompt.rb
CHANGED
@@ -22,21 +22,39 @@ module DSPy
|
|
22
22
|
sig { returns(T.nilable(String)) }
|
23
23
|
attr_reader :signature_class_name
|
24
24
|
|
25
|
+
# Returns the effective schema format
|
26
|
+
# Precedence: instance variable (if not :json default) > config.lm > :json
|
27
|
+
sig { returns(Symbol) }
|
28
|
+
def schema_format
|
29
|
+
# If @schema_format was explicitly set to something other than :json, respect it
|
30
|
+
return @schema_format if @schema_format && @schema_format != :json
|
31
|
+
|
32
|
+
# Otherwise, read from config if available
|
33
|
+
DSPy.config.lm&.schema_format || @schema_format || :json
|
34
|
+
end
|
35
|
+
|
36
|
+
sig { returns(T.nilable(T.class_of(Signature))) }
|
37
|
+
attr_reader :signature_class
|
38
|
+
|
25
39
|
sig do
|
26
40
|
params(
|
27
41
|
instruction: String,
|
28
42
|
input_schema: T::Hash[Symbol, T.untyped],
|
29
43
|
output_schema: T::Hash[Symbol, T.untyped],
|
30
44
|
few_shot_examples: T::Array[FewShotExample],
|
31
|
-
signature_class_name: T.nilable(String)
|
45
|
+
signature_class_name: T.nilable(String),
|
46
|
+
schema_format: Symbol,
|
47
|
+
signature_class: T.nilable(T.class_of(Signature))
|
32
48
|
).void
|
33
49
|
end
|
34
|
-
def initialize(instruction:, input_schema:, output_schema:, few_shot_examples: [], signature_class_name: nil)
|
50
|
+
def initialize(instruction:, input_schema:, output_schema:, few_shot_examples: [], signature_class_name: nil, schema_format: :json, signature_class: nil)
|
35
51
|
@instruction = instruction
|
36
52
|
@few_shot_examples = few_shot_examples.freeze
|
37
53
|
@input_schema = input_schema.freeze
|
38
54
|
@output_schema = output_schema.freeze
|
39
55
|
@signature_class_name = signature_class_name
|
56
|
+
@schema_format = schema_format
|
57
|
+
@signature_class = signature_class
|
40
58
|
end
|
41
59
|
|
42
60
|
# Immutable update methods for optimization
|
@@ -47,7 +65,9 @@ module DSPy
|
|
47
65
|
input_schema: @input_schema,
|
48
66
|
output_schema: @output_schema,
|
49
67
|
few_shot_examples: @few_shot_examples,
|
50
|
-
signature_class_name: @signature_class_name
|
68
|
+
signature_class_name: @signature_class_name,
|
69
|
+
schema_format: @schema_format,
|
70
|
+
signature_class: @signature_class
|
51
71
|
)
|
52
72
|
end
|
53
73
|
|
@@ -58,7 +78,9 @@ module DSPy
|
|
58
78
|
input_schema: @input_schema,
|
59
79
|
output_schema: @output_schema,
|
60
80
|
few_shot_examples: new_examples,
|
61
|
-
signature_class_name: @signature_class_name
|
81
|
+
signature_class_name: @signature_class_name,
|
82
|
+
schema_format: @schema_format,
|
83
|
+
signature_class: @signature_class
|
62
84
|
)
|
63
85
|
end
|
64
86
|
|
@@ -72,16 +94,29 @@ module DSPy
|
|
72
94
|
sig { returns(String) }
|
73
95
|
def render_system_prompt
|
74
96
|
sections = []
|
75
|
-
|
76
|
-
|
77
|
-
|
78
|
-
|
79
|
-
|
80
|
-
|
81
|
-
|
82
|
-
|
83
|
-
|
84
|
-
|
97
|
+
|
98
|
+
case schema_format
|
99
|
+
when :baml
|
100
|
+
sections << "Your input schema fields are:"
|
101
|
+
sections << "```baml"
|
102
|
+
sections << render_baml_schema(@input_schema, :input)
|
103
|
+
sections << "```"
|
104
|
+
|
105
|
+
sections << "Your output schema fields are:"
|
106
|
+
sections << "```baml"
|
107
|
+
sections << render_baml_schema(@output_schema, :output)
|
108
|
+
sections << "```"
|
109
|
+
else # :json (default)
|
110
|
+
sections << "Your input schema fields are:"
|
111
|
+
sections << "```json"
|
112
|
+
sections << JSON.pretty_generate(@input_schema)
|
113
|
+
sections << "```"
|
114
|
+
|
115
|
+
sections << "Your output schema fields are:"
|
116
|
+
sections << "```json"
|
117
|
+
sections << JSON.pretty_generate(@output_schema)
|
118
|
+
sections << "```"
|
119
|
+
end
|
85
120
|
|
86
121
|
sections << ""
|
87
122
|
sections << "All interactions will be structured in the following way, with the appropriate values filled in."
|
@@ -148,32 +183,36 @@ module DSPy
|
|
148
183
|
few_shot_examples: @few_shot_examples.map(&:to_h),
|
149
184
|
input_schema: @input_schema,
|
150
185
|
output_schema: @output_schema,
|
151
|
-
signature_class_name: @signature_class_name
|
186
|
+
signature_class_name: @signature_class_name,
|
187
|
+
schema_format: @schema_format
|
152
188
|
}
|
153
189
|
end
|
154
190
|
|
155
191
|
sig { params(hash: T::Hash[Symbol, T.untyped]).returns(Prompt) }
|
156
192
|
def self.from_h(hash)
|
157
193
|
examples = (hash[:few_shot_examples] || []).map { |ex| FewShotExample.from_h(ex) }
|
158
|
-
|
194
|
+
|
159
195
|
new(
|
160
196
|
instruction: hash[:instruction] || "",
|
161
197
|
input_schema: hash[:input_schema] || {},
|
162
198
|
output_schema: hash[:output_schema] || {},
|
163
199
|
few_shot_examples: examples,
|
164
|
-
signature_class_name: hash[:signature_class_name]
|
200
|
+
signature_class_name: hash[:signature_class_name],
|
201
|
+
schema_format: hash[:schema_format] || :json
|
165
202
|
)
|
166
203
|
end
|
167
204
|
|
168
205
|
# Create prompt from signature class
|
169
|
-
sig { params(signature_class: T.class_of(Signature)).returns(Prompt) }
|
170
|
-
def self.from_signature(signature_class)
|
206
|
+
sig { params(signature_class: T.class_of(Signature), schema_format: Symbol).returns(Prompt) }
|
207
|
+
def self.from_signature(signature_class, schema_format: :json)
|
171
208
|
new(
|
172
209
|
instruction: signature_class.description || "Complete this task.",
|
173
210
|
input_schema: signature_class.input_json_schema,
|
174
211
|
output_schema: signature_class.output_json_schema,
|
175
212
|
few_shot_examples: [],
|
176
|
-
signature_class_name: signature_class.name
|
213
|
+
signature_class_name: signature_class.name,
|
214
|
+
schema_format: schema_format,
|
215
|
+
signature_class: signature_class
|
177
216
|
)
|
178
217
|
end
|
179
218
|
|
@@ -221,6 +260,37 @@ module DSPy
|
|
221
260
|
|
222
261
|
private
|
223
262
|
|
263
|
+
# Render BAML schema for input or output
|
264
|
+
sig { params(schema: T::Hash[Symbol, T.untyped], type: Symbol).returns(String) }
|
265
|
+
def render_baml_schema(schema, type)
|
266
|
+
# If we have a signature_class, use sorbet-baml's to_baml method with custom name
|
267
|
+
if @signature_class
|
268
|
+
begin
|
269
|
+
require 'sorbet_baml'
|
270
|
+
|
271
|
+
struct_class = type == :input ? @signature_class.input_struct_class : @signature_class.output_struct_class
|
272
|
+
if struct_class
|
273
|
+
# Generate a proper class name from signature class name
|
274
|
+
base_name = @signature_class_name || @signature_class.name || "Schema"
|
275
|
+
class_name = type == :input ? "#{base_name}Input" : "#{base_name}Output"
|
276
|
+
|
277
|
+
# Get raw BAML and replace the ugly class name
|
278
|
+
raw_baml = struct_class.to_baml
|
279
|
+
# Replace the class definition line with a proper name
|
280
|
+
return raw_baml.sub(/^class #<Class:0x[0-9a-f]+>/, "class #{class_name}")
|
281
|
+
end
|
282
|
+
rescue LoadError
|
283
|
+
# Fall back to manual BAML generation if sorbet_baml is not available
|
284
|
+
end
|
285
|
+
end
|
286
|
+
|
287
|
+
# Fallback: generate BAML manually from schema
|
288
|
+
# This is a simple implementation that handles basic types
|
289
|
+
# For production use, sorbet-baml should be available
|
290
|
+
"# BAML schema generation requires sorbet-baml gem\n" \
|
291
|
+
"# Please install: gem install sorbet-baml"
|
292
|
+
end
|
293
|
+
|
224
294
|
# Recursively serialize complex objects for JSON representation
|
225
295
|
sig { params(obj: T.untyped).returns(T.untyped) }
|
226
296
|
def serialize_for_json(obj)
|
@@ -0,0 +1,177 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require 'sorbet-runtime'
|
4
|
+
require_relative '../signature'
|
5
|
+
require_relative '../predict'
|
6
|
+
|
7
|
+
module DSPy
|
8
|
+
module Propose
|
9
|
+
# Dataset Summary Generator for creating concise dataset descriptions
|
10
|
+
# Used by GroundedProposer for data-aware instruction generation
|
11
|
+
module DatasetSummaryGenerator
|
12
|
+
extend T::Sig
|
13
|
+
|
14
|
+
# Signature for summarizing observations into a brief summary
|
15
|
+
class ObservationSummarizer < DSPy::Signature
|
16
|
+
description "Given a series of observations I have made about my dataset, please summarize them into a brief 2-3 sentence summary which highlights only the most important details."
|
17
|
+
|
18
|
+
input do
|
19
|
+
const :observations, String, description: "Observations I have made about my dataset"
|
20
|
+
end
|
21
|
+
|
22
|
+
output do
|
23
|
+
const :summary, String, description: "Two to Three sentence summary of only the most significant highlights of my observations"
|
24
|
+
end
|
25
|
+
end
|
26
|
+
|
27
|
+
# Signature for generating initial dataset observations
|
28
|
+
class DatasetDescriptor < DSPy::Signature
|
29
|
+
description "Given several examples from a dataset please write observations about trends that hold for most or all of the samples. " \
|
30
|
+
"Some areas you may consider in your observations: topics, content, syntax, conciceness, etc. " \
|
31
|
+
"It will be useful to make an educated guess as to the nature of the task this dataset will enable. Don't be afraid to be creative"
|
32
|
+
|
33
|
+
input do
|
34
|
+
const :examples, String, description: "Sample data points from the dataset"
|
35
|
+
end
|
36
|
+
|
37
|
+
output do
|
38
|
+
const :observations, String, description: "Somethings that holds true for most or all of the data you observed"
|
39
|
+
end
|
40
|
+
end
|
41
|
+
|
42
|
+
# Signature for refining observations with prior context
|
43
|
+
class DatasetDescriptorWithPriorObservations < DSPy::Signature
|
44
|
+
description "Given several examples from a dataset please write observations about trends that hold for most or all of the samples. " \
|
45
|
+
"I will also provide you with a few observations I have already made. Please add your own observations or if you feel the observations are comprehensive say 'COMPLETE' " \
|
46
|
+
"Some areas you may consider in your observations: topics, content, syntax, conciceness, etc. " \
|
47
|
+
"It will be useful to make an educated guess as to the nature of the task this dataset will enable. Don't be afraid to be creative"
|
48
|
+
|
49
|
+
input do
|
50
|
+
const :examples, String, description: "Sample data points from the dataset"
|
51
|
+
const :prior_observations, String, description: "Some prior observations I made about the data"
|
52
|
+
end
|
53
|
+
|
54
|
+
output do
|
55
|
+
const :observations, String, description: "Somethings that holds true for most or all of the data you observed or COMPLETE if you have nothing to add"
|
56
|
+
end
|
57
|
+
end
|
58
|
+
|
59
|
+
# Helper function to ensure consistent ordering of input keys in string representations
|
60
|
+
# This helps with caching and consistent LLM prompts
|
61
|
+
sig { params(unordered_repr: String).returns(String) }
|
62
|
+
def self.order_input_keys_in_string(unordered_repr)
|
63
|
+
# Regex pattern to match the input keys structure
|
64
|
+
pattern = /input_keys=\{([^}]+)\}/
|
65
|
+
|
66
|
+
# Function to reorder keys
|
67
|
+
unordered_repr.gsub(pattern) do |match|
|
68
|
+
keys_str = Regexp.last_match(1)
|
69
|
+
# Split the keys, strip extra spaces, and sort them
|
70
|
+
keys = keys_str.split(',').map(&:strip).sort
|
71
|
+
# Format the sorted keys back into the expected structure
|
72
|
+
"input_keys={#{keys.join(', ')}}"
|
73
|
+
end
|
74
|
+
end
|
75
|
+
|
76
|
+
# Strip common prefixes from LLM outputs (e.g., "Answer:", "Output:")
|
77
|
+
sig { params(text: String).returns(String) }
|
78
|
+
def self.strip_prefix(text)
|
79
|
+
# Pattern matches up to 4 words followed by a colon
|
80
|
+
pattern = /^[\*\s]*(([\w'\-]+\s+){0,4}[\w'\-]+):\s*/
|
81
|
+
modified_text = text.gsub(pattern, '')
|
82
|
+
modified_text.strip.gsub(/^["']|["']$/, '')
|
83
|
+
end
|
84
|
+
|
85
|
+
# Generate a concise 2-3 sentence summary of a training dataset
|
86
|
+
# Used for data-aware instruction proposal in MIPROv2
|
87
|
+
#
|
88
|
+
# @param trainset [Array<DSPy::Example>] Training examples to summarize
|
89
|
+
# @param view_data_batch_size [Integer] Number of examples to process per batch
|
90
|
+
# @param prompt_model [DSPy::LM, nil] Language model to use (defaults to DSPy.lm)
|
91
|
+
# @param verbose [Boolean] Whether to print progress information
|
92
|
+
# @return [String] 2-3 sentence summary of the dataset characteristics
|
93
|
+
#
|
94
|
+
# @example Basic usage
|
95
|
+
# summary = DatasetSummaryGenerator.create_dataset_summary(
|
96
|
+
# trainset,
|
97
|
+
# view_data_batch_size: 10,
|
98
|
+
# prompt_model: DSPy::LM.new('gpt-4o-mini')
|
99
|
+
# )
|
100
|
+
#
|
101
|
+
sig do
|
102
|
+
params(
|
103
|
+
trainset: T::Array[DSPy::Example],
|
104
|
+
view_data_batch_size: Integer,
|
105
|
+
prompt_model: T.nilable(DSPy::LM),
|
106
|
+
verbose: T::Boolean
|
107
|
+
).returns(String)
|
108
|
+
end
|
109
|
+
def self.create_dataset_summary(trainset, view_data_batch_size, prompt_model, verbose: false)
|
110
|
+
if verbose
|
111
|
+
puts "\nBootstrapping dataset summary (this will be used to generate instructions)..."
|
112
|
+
end
|
113
|
+
|
114
|
+
# Use provided model or fall back to global LM
|
115
|
+
lm = prompt_model || DSPy.lm
|
116
|
+
raise ArgumentError, "No language model configured. Set prompt_model or DSPy.lm" unless lm
|
117
|
+
|
118
|
+
# Use provided LM in a block context
|
119
|
+
DSPy.with_lm(lm) do
|
120
|
+
# Initial observation from first batch
|
121
|
+
upper_lim = [trainset.length, view_data_batch_size].min
|
122
|
+
examples_repr = order_input_keys_in_string(trainset[0...upper_lim].inspect)
|
123
|
+
|
124
|
+
predictor = DSPy::Predict.new(DatasetDescriptor)
|
125
|
+
observation = predictor.call(examples: examples_repr)
|
126
|
+
observations = observation.observations
|
127
|
+
|
128
|
+
# Iteratively refine observations with additional batches
|
129
|
+
skips = 0
|
130
|
+
max_calls = 10
|
131
|
+
calls = 0
|
132
|
+
|
133
|
+
begin
|
134
|
+
(view_data_batch_size...trainset.length).step(view_data_batch_size) do |b|
|
135
|
+
calls += 1
|
136
|
+
break if calls >= max_calls
|
137
|
+
|
138
|
+
puts "Processing batch starting at index #{b}" if verbose
|
139
|
+
|
140
|
+
upper_lim = [trainset.length, b + view_data_batch_size].min
|
141
|
+
examples_repr = order_input_keys_in_string(trainset[b...upper_lim].inspect)
|
142
|
+
|
143
|
+
predictor = DSPy::Predict.new(DatasetDescriptorWithPriorObservations)
|
144
|
+
output = predictor.call(
|
145
|
+
prior_observations: observations,
|
146
|
+
examples: examples_repr
|
147
|
+
)
|
148
|
+
|
149
|
+
# Check if LLM indicates observations are complete
|
150
|
+
if output.observations.length >= 8 && output.observations[0...8].upcase == "COMPLETE"
|
151
|
+
skips += 1
|
152
|
+
break if skips >= 5
|
153
|
+
next
|
154
|
+
end
|
155
|
+
|
156
|
+
observations += output.observations
|
157
|
+
end
|
158
|
+
rescue => e
|
159
|
+
if verbose
|
160
|
+
puts "Error during observation refinement: #{e.message}. Using observations from past round for summary."
|
161
|
+
end
|
162
|
+
end
|
163
|
+
|
164
|
+
# Generate final summary from accumulated observations
|
165
|
+
predictor = DSPy::Predict.new(ObservationSummarizer)
|
166
|
+
summary = predictor.call(observations: observations)
|
167
|
+
|
168
|
+
if verbose
|
169
|
+
puts "\nGenerated summary: #{strip_prefix(summary.summary)}\n"
|
170
|
+
end
|
171
|
+
|
172
|
+
strip_prefix(summary.summary)
|
173
|
+
end
|
174
|
+
end
|
175
|
+
end
|
176
|
+
end
|
177
|
+
end
|