dspy 0.3.1 → 0.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +69 -382
- data/lib/dspy/chain_of_thought.rb +57 -0
- data/lib/dspy/evaluate.rb +554 -0
- data/lib/dspy/example.rb +203 -0
- data/lib/dspy/few_shot_example.rb +81 -0
- data/lib/dspy/instrumentation.rb +97 -8
- data/lib/dspy/lm/adapter_factory.rb +6 -8
- data/lib/dspy/lm.rb +5 -7
- data/lib/dspy/predict.rb +32 -34
- data/lib/dspy/prompt.rb +222 -0
- data/lib/dspy/propose/grounded_proposer.rb +560 -0
- data/lib/dspy/registry/registry_manager.rb +504 -0
- data/lib/dspy/registry/signature_registry.rb +725 -0
- data/lib/dspy/storage/program_storage.rb +442 -0
- data/lib/dspy/storage/storage_manager.rb +331 -0
- data/lib/dspy/subscribers/langfuse_subscriber.rb +669 -0
- data/lib/dspy/subscribers/logger_subscriber.rb +120 -0
- data/lib/dspy/subscribers/newrelic_subscriber.rb +686 -0
- data/lib/dspy/subscribers/otel_subscriber.rb +538 -0
- data/lib/dspy/teleprompt/data_handler.rb +107 -0
- data/lib/dspy/teleprompt/mipro_v2.rb +790 -0
- data/lib/dspy/teleprompt/simple_optimizer.rb +497 -0
- data/lib/dspy/teleprompt/teleprompter.rb +336 -0
- data/lib/dspy/teleprompt/utils.rb +380 -0
- data/lib/dspy/version.rb +5 -0
- data/lib/dspy.rb +16 -0
- metadata +29 -12
- data/lib/dspy/lm/adapters/ruby_llm_adapter.rb +0 -81
@@ -0,0 +1,442 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require 'sorbet-runtime'
|
4
|
+
require 'json'
|
5
|
+
require 'fileutils'
|
6
|
+
require 'digest'
|
7
|
+
|
8
|
+
module DSPy
|
9
|
+
module Storage
|
10
|
+
# Storage system for saving and loading optimized DSPy programs
|
11
|
+
# Handles serialization of optimization results, program state, and history tracking
|
12
|
+
class ProgramStorage
|
13
|
+
extend T::Sig
|
14
|
+
|
15
|
+
# Represents a saved program with metadata
|
16
|
+
class SavedProgram
|
17
|
+
extend T::Sig
|
18
|
+
|
19
|
+
sig { returns(T.untyped) }
|
20
|
+
attr_reader :program
|
21
|
+
|
22
|
+
sig { returns(T::Hash[Symbol, T.untyped]) }
|
23
|
+
attr_reader :optimization_result
|
24
|
+
|
25
|
+
sig { returns(T::Hash[Symbol, T.untyped]) }
|
26
|
+
attr_reader :metadata
|
27
|
+
|
28
|
+
sig { returns(String) }
|
29
|
+
attr_reader :program_id
|
30
|
+
|
31
|
+
sig { returns(Time) }
|
32
|
+
attr_reader :saved_at
|
33
|
+
|
34
|
+
sig do
|
35
|
+
params(
|
36
|
+
program: T.untyped,
|
37
|
+
optimization_result: T::Hash[Symbol, T.untyped],
|
38
|
+
metadata: T::Hash[Symbol, T.untyped],
|
39
|
+
program_id: T.nilable(String),
|
40
|
+
saved_at: T.nilable(Time)
|
41
|
+
).void
|
42
|
+
end
|
43
|
+
def initialize(program:, optimization_result:, metadata: {}, program_id: nil, saved_at: nil)
|
44
|
+
@program = program
|
45
|
+
@optimization_result = optimization_result
|
46
|
+
dspy_version = begin
|
47
|
+
DSPy::VERSION
|
48
|
+
rescue
|
49
|
+
"unknown"
|
50
|
+
end
|
51
|
+
|
52
|
+
@metadata = metadata.merge({
|
53
|
+
dspy_version: dspy_version,
|
54
|
+
ruby_version: RUBY_VERSION,
|
55
|
+
saved_with: "DSPy::Storage::ProgramStorage"
|
56
|
+
})
|
57
|
+
@program_id = program_id || generate_program_id
|
58
|
+
@saved_at = saved_at || Time.now
|
59
|
+
end
|
60
|
+
|
61
|
+
sig { returns(T::Hash[Symbol, T.untyped]) }
|
62
|
+
def to_h
|
63
|
+
{
|
64
|
+
program_id: @program_id,
|
65
|
+
saved_at: @saved_at.iso8601,
|
66
|
+
program_data: serialize_program(@program),
|
67
|
+
optimization_result: @optimization_result,
|
68
|
+
metadata: @metadata
|
69
|
+
}
|
70
|
+
end
|
71
|
+
|
72
|
+
sig { params(data: T::Hash[Symbol, T.untyped]).returns(SavedProgram) }
|
73
|
+
def self.from_h(data)
|
74
|
+
new(
|
75
|
+
program: deserialize_program(data[:program_data]),
|
76
|
+
optimization_result: data[:optimization_result],
|
77
|
+
metadata: data[:metadata] || {},
|
78
|
+
program_id: data[:program_id],
|
79
|
+
saved_at: Time.parse(data[:saved_at])
|
80
|
+
)
|
81
|
+
end
|
82
|
+
|
83
|
+
private
|
84
|
+
|
85
|
+
sig { returns(String) }
|
86
|
+
def generate_program_id
|
87
|
+
content = "#{@optimization_result[:best_score_value]}_#{@metadata.hash}_#{Time.now.to_f}"
|
88
|
+
Digest::SHA256.hexdigest(content)[0, 16]
|
89
|
+
end
|
90
|
+
|
91
|
+
sig { params(program: T.untyped).returns(T::Hash[Symbol, T.untyped]) }
|
92
|
+
def serialize_program(program)
|
93
|
+
# Basic serialization - can be enhanced for specific program types
|
94
|
+
{
|
95
|
+
class_name: program.class.name,
|
96
|
+
state: extract_program_state(program)
|
97
|
+
}
|
98
|
+
end
|
99
|
+
|
100
|
+
sig { params(data: T::Hash[Symbol, T.untyped]).returns(T.untyped) }
|
101
|
+
def self.deserialize_program(data)
|
102
|
+
# Basic deserialization - would need enhancement for complex programs
|
103
|
+
# For now, return the serialized state
|
104
|
+
data
|
105
|
+
end
|
106
|
+
|
107
|
+
sig { params(program: T.untyped).returns(T::Hash[Symbol, T.untyped]) }
|
108
|
+
def extract_program_state(program)
|
109
|
+
state = {}
|
110
|
+
|
111
|
+
# Extract common program properties
|
112
|
+
if program.respond_to?(:signature_class)
|
113
|
+
state[:signature_class] = program.signature_class&.name
|
114
|
+
end
|
115
|
+
|
116
|
+
if program.respond_to?(:prompt) && program.prompt.respond_to?(:instruction)
|
117
|
+
state[:instruction] = program.prompt.instruction
|
118
|
+
end
|
119
|
+
|
120
|
+
if program.respond_to?(:few_shot_examples)
|
121
|
+
state[:few_shot_examples] = program.few_shot_examples
|
122
|
+
end
|
123
|
+
|
124
|
+
state
|
125
|
+
end
|
126
|
+
end
|
127
|
+
|
128
|
+
sig { returns(String) }
|
129
|
+
attr_reader :storage_path
|
130
|
+
|
131
|
+
sig { returns(T::Boolean) }
|
132
|
+
attr_reader :create_directories
|
133
|
+
|
134
|
+
sig do
|
135
|
+
params(
|
136
|
+
storage_path: String,
|
137
|
+
create_directories: T::Boolean
|
138
|
+
).void
|
139
|
+
end
|
140
|
+
def initialize(storage_path: "./dspy_storage", create_directories: true)
|
141
|
+
@storage_path = File.expand_path(storage_path)
|
142
|
+
@create_directories = create_directories
|
143
|
+
|
144
|
+
setup_storage_directory if @create_directories
|
145
|
+
end
|
146
|
+
|
147
|
+
# Save an optimized program with its optimization results
|
148
|
+
sig do
|
149
|
+
params(
|
150
|
+
program: T.untyped,
|
151
|
+
optimization_result: T.untyped,
|
152
|
+
program_id: T.nilable(String),
|
153
|
+
metadata: T::Hash[Symbol, T.untyped]
|
154
|
+
).returns(SavedProgram)
|
155
|
+
end
|
156
|
+
def save_program(program, optimization_result, program_id: nil, metadata: {})
|
157
|
+
emit_save_start_event(program_id)
|
158
|
+
|
159
|
+
begin
|
160
|
+
# Convert optimization result to hash if it's an object
|
161
|
+
result_hash = optimization_result.respond_to?(:to_h) ? optimization_result.to_h : optimization_result
|
162
|
+
|
163
|
+
saved_program = SavedProgram.new(
|
164
|
+
program: program,
|
165
|
+
optimization_result: result_hash,
|
166
|
+
metadata: metadata,
|
167
|
+
program_id: program_id
|
168
|
+
)
|
169
|
+
|
170
|
+
# Write to file
|
171
|
+
file_path = program_file_path(saved_program.program_id)
|
172
|
+
File.write(file_path, JSON.pretty_generate(saved_program.to_h))
|
173
|
+
|
174
|
+
# Update history
|
175
|
+
update_history(saved_program)
|
176
|
+
|
177
|
+
emit_save_complete_event(saved_program)
|
178
|
+
saved_program
|
179
|
+
|
180
|
+
rescue => error
|
181
|
+
emit_save_error_event(program_id, error)
|
182
|
+
raise
|
183
|
+
end
|
184
|
+
end
|
185
|
+
|
186
|
+
# Load a program by its ID
|
187
|
+
sig { params(program_id: String).returns(T.nilable(SavedProgram)) }
|
188
|
+
def load_program(program_id)
|
189
|
+
emit_load_start_event(program_id)
|
190
|
+
|
191
|
+
begin
|
192
|
+
file_path = program_file_path(program_id)
|
193
|
+
|
194
|
+
unless File.exist?(file_path)
|
195
|
+
emit_load_error_event(program_id, "Program not found: #{program_id}")
|
196
|
+
return nil
|
197
|
+
end
|
198
|
+
|
199
|
+
data = JSON.parse(File.read(file_path), symbolize_names: true)
|
200
|
+
saved_program = SavedProgram.from_h(data)
|
201
|
+
|
202
|
+
emit_load_complete_event(saved_program)
|
203
|
+
saved_program
|
204
|
+
|
205
|
+
rescue => error
|
206
|
+
emit_load_error_event(program_id, error)
|
207
|
+
nil
|
208
|
+
end
|
209
|
+
end
|
210
|
+
|
211
|
+
# List all saved programs
|
212
|
+
sig { returns(T::Array[T::Hash[Symbol, T.untyped]]) }
|
213
|
+
def list_programs
|
214
|
+
history_path = File.join(@storage_path, "history.json")
|
215
|
+
return [] unless File.exist?(history_path)
|
216
|
+
|
217
|
+
history_data = JSON.parse(File.read(history_path), symbolize_names: true)
|
218
|
+
history_data[:programs] || []
|
219
|
+
end
|
220
|
+
|
221
|
+
# Get program history with performance metrics
|
222
|
+
sig { returns(T::Hash[Symbol, T.untyped]) }
|
223
|
+
def get_history
|
224
|
+
history_path = File.join(@storage_path, "history.json")
|
225
|
+
return { programs: [], summary: {} } unless File.exist?(history_path)
|
226
|
+
|
227
|
+
JSON.parse(File.read(history_path), symbolize_names: true)
|
228
|
+
end
|
229
|
+
|
230
|
+
# Delete a saved program
|
231
|
+
sig { params(program_id: String).returns(T::Boolean) }
|
232
|
+
def delete_program(program_id)
|
233
|
+
file_path = program_file_path(program_id)
|
234
|
+
|
235
|
+
if File.exist?(file_path)
|
236
|
+
File.delete(file_path)
|
237
|
+
remove_from_history(program_id)
|
238
|
+
emit_delete_event(program_id)
|
239
|
+
true
|
240
|
+
else
|
241
|
+
false
|
242
|
+
end
|
243
|
+
end
|
244
|
+
|
245
|
+
# Export multiple programs to a single file
|
246
|
+
sig { params(program_ids: T::Array[String], export_path: String).void }
|
247
|
+
def export_programs(program_ids, export_path)
|
248
|
+
programs = program_ids.map { |id| load_program(id) }.compact
|
249
|
+
|
250
|
+
dspy_version = begin
|
251
|
+
DSPy::VERSION
|
252
|
+
rescue
|
253
|
+
"unknown"
|
254
|
+
end
|
255
|
+
|
256
|
+
export_data = {
|
257
|
+
exported_at: Time.now.iso8601,
|
258
|
+
dspy_version: dspy_version,
|
259
|
+
programs: programs.map(&:to_h)
|
260
|
+
}
|
261
|
+
|
262
|
+
File.write(export_path, JSON.pretty_generate(export_data))
|
263
|
+
emit_export_event(export_path, programs.size)
|
264
|
+
end
|
265
|
+
|
266
|
+
# Import programs from an exported file
|
267
|
+
sig { params(import_path: String).returns(T::Array[SavedProgram]) }
|
268
|
+
def import_programs(import_path)
|
269
|
+
data = JSON.parse(File.read(import_path), symbolize_names: true)
|
270
|
+
imported = []
|
271
|
+
|
272
|
+
data[:programs].each do |program_data|
|
273
|
+
saved_program = SavedProgram.from_h(program_data)
|
274
|
+
|
275
|
+
# Save with new timestamp but preserve original ID
|
276
|
+
file_path = program_file_path(saved_program.program_id)
|
277
|
+
File.write(file_path, JSON.pretty_generate(saved_program.to_h))
|
278
|
+
|
279
|
+
update_history(saved_program)
|
280
|
+
imported << saved_program
|
281
|
+
end
|
282
|
+
|
283
|
+
emit_import_event(import_path, imported.size)
|
284
|
+
imported
|
285
|
+
end
|
286
|
+
|
287
|
+
private
|
288
|
+
|
289
|
+
sig { void }
|
290
|
+
def setup_storage_directory
|
291
|
+
FileUtils.mkdir_p(@storage_path) unless Dir.exist?(@storage_path)
|
292
|
+
|
293
|
+
# Create programs subdirectory
|
294
|
+
programs_dir = File.join(@storage_path, "programs")
|
295
|
+
FileUtils.mkdir_p(programs_dir) unless Dir.exist?(programs_dir)
|
296
|
+
end
|
297
|
+
|
298
|
+
sig { params(program_id: String).returns(String) }
|
299
|
+
def program_file_path(program_id)
|
300
|
+
File.join(@storage_path, "programs", "#{program_id}.json")
|
301
|
+
end
|
302
|
+
|
303
|
+
sig { params(saved_program: SavedProgram).void }
|
304
|
+
def update_history(saved_program)
|
305
|
+
history_path = File.join(@storage_path, "history.json")
|
306
|
+
|
307
|
+
history = if File.exist?(history_path)
|
308
|
+
JSON.parse(File.read(history_path), symbolize_names: true)
|
309
|
+
else
|
310
|
+
{ programs: [], summary: { total_programs: 0, avg_score: 0.0 } }
|
311
|
+
end
|
312
|
+
|
313
|
+
# Add or update program entry
|
314
|
+
program_entry = {
|
315
|
+
program_id: saved_program.program_id,
|
316
|
+
saved_at: saved_program.saved_at.iso8601,
|
317
|
+
best_score: saved_program.optimization_result[:best_score_value],
|
318
|
+
score_name: saved_program.optimization_result[:best_score_name],
|
319
|
+
optimizer: saved_program.optimization_result[:metadata]&.dig(:optimizer),
|
320
|
+
signature_class: saved_program.metadata[:signature_class],
|
321
|
+
metadata: saved_program.metadata
|
322
|
+
}
|
323
|
+
|
324
|
+
# Remove existing entry if updating
|
325
|
+
history[:programs].reject! { |p| p[:program_id] == saved_program.program_id }
|
326
|
+
history[:programs] << program_entry
|
327
|
+
|
328
|
+
# Update summary
|
329
|
+
scores = history[:programs].map { |p| p[:best_score] }.compact
|
330
|
+
history[:summary] = {
|
331
|
+
total_programs: history[:programs].size,
|
332
|
+
avg_score: scores.empty? ? 0.0 : scores.sum.to_f / scores.size,
|
333
|
+
latest_save: saved_program.saved_at.iso8601
|
334
|
+
}
|
335
|
+
|
336
|
+
File.write(history_path, JSON.pretty_generate(history))
|
337
|
+
end
|
338
|
+
|
339
|
+
sig { params(program_id: String).void }
|
340
|
+
def remove_from_history(program_id)
|
341
|
+
history_path = File.join(@storage_path, "history.json")
|
342
|
+
return unless File.exist?(history_path)
|
343
|
+
|
344
|
+
history = JSON.parse(File.read(history_path), symbolize_names: true)
|
345
|
+
history[:programs].reject! { |p| p[:program_id] == program_id }
|
346
|
+
|
347
|
+
# Recalculate summary
|
348
|
+
scores = history[:programs].map { |p| p[:best_score] }.compact
|
349
|
+
history[:summary] = {
|
350
|
+
total_programs: history[:programs].size,
|
351
|
+
avg_score: scores.empty? ? 0.0 : scores.sum.to_f / scores.size
|
352
|
+
}
|
353
|
+
|
354
|
+
File.write(history_path, JSON.pretty_generate(history))
|
355
|
+
end
|
356
|
+
|
357
|
+
# Event emission methods
|
358
|
+
sig { params(program_id: T.nilable(String)).void }
|
359
|
+
def emit_save_start_event(program_id)
|
360
|
+
DSPy::Instrumentation.emit('dspy.storage.save_start', {
|
361
|
+
program_id: program_id,
|
362
|
+
storage_path: @storage_path,
|
363
|
+
timestamp: Time.now.iso8601
|
364
|
+
})
|
365
|
+
end
|
366
|
+
|
367
|
+
sig { params(saved_program: SavedProgram).void }
|
368
|
+
def emit_save_complete_event(saved_program)
|
369
|
+
DSPy::Instrumentation.emit('dspy.storage.save_complete', {
|
370
|
+
program_id: saved_program.program_id,
|
371
|
+
best_score: saved_program.optimization_result[:best_score_value],
|
372
|
+
file_size_bytes: File.size(program_file_path(saved_program.program_id)),
|
373
|
+
timestamp: Time.now.iso8601
|
374
|
+
})
|
375
|
+
end
|
376
|
+
|
377
|
+
sig { params(program_id: T.nilable(String), error: Exception).void }
|
378
|
+
def emit_save_error_event(program_id, error)
|
379
|
+
DSPy::Instrumentation.emit('dspy.storage.save_error', {
|
380
|
+
program_id: program_id,
|
381
|
+
error: error.message,
|
382
|
+
error_class: error.class.name,
|
383
|
+
timestamp: Time.now.iso8601
|
384
|
+
})
|
385
|
+
end
|
386
|
+
|
387
|
+
sig { params(program_id: String).void }
|
388
|
+
def emit_load_start_event(program_id)
|
389
|
+
DSPy::Instrumentation.emit('dspy.storage.load_start', {
|
390
|
+
program_id: program_id,
|
391
|
+
timestamp: Time.now.iso8601
|
392
|
+
})
|
393
|
+
end
|
394
|
+
|
395
|
+
sig { params(saved_program: SavedProgram).void }
|
396
|
+
def emit_load_complete_event(saved_program)
|
397
|
+
DSPy::Instrumentation.emit('dspy.storage.load_complete', {
|
398
|
+
program_id: saved_program.program_id,
|
399
|
+
saved_at: saved_program.saved_at.iso8601,
|
400
|
+
age_hours: ((Time.now - saved_program.saved_at) / 3600).round(2),
|
401
|
+
timestamp: Time.now.iso8601
|
402
|
+
})
|
403
|
+
end
|
404
|
+
|
405
|
+
sig { params(program_id: String, error: T.any(String, Exception)).void }
|
406
|
+
def emit_load_error_event(program_id, error)
|
407
|
+
error_message = error.is_a?(Exception) ? error.message : error.to_s
|
408
|
+
DSPy::Instrumentation.emit('dspy.storage.load_error', {
|
409
|
+
program_id: program_id,
|
410
|
+
error: error_message,
|
411
|
+
timestamp: Time.now.iso8601
|
412
|
+
})
|
413
|
+
end
|
414
|
+
|
415
|
+
sig { params(program_id: String).void }
|
416
|
+
def emit_delete_event(program_id)
|
417
|
+
DSPy::Instrumentation.emit('dspy.storage.delete', {
|
418
|
+
program_id: program_id,
|
419
|
+
timestamp: Time.now.iso8601
|
420
|
+
})
|
421
|
+
end
|
422
|
+
|
423
|
+
sig { params(export_path: String, program_count: Integer).void }
|
424
|
+
def emit_export_event(export_path, program_count)
|
425
|
+
DSPy::Instrumentation.emit('dspy.storage.export', {
|
426
|
+
export_path: export_path,
|
427
|
+
program_count: program_count,
|
428
|
+
timestamp: Time.now.iso8601
|
429
|
+
})
|
430
|
+
end
|
431
|
+
|
432
|
+
sig { params(import_path: String, program_count: Integer).void }
|
433
|
+
def emit_import_event(import_path, program_count)
|
434
|
+
DSPy::Instrumentation.emit('dspy.storage.import', {
|
435
|
+
import_path: import_path,
|
436
|
+
program_count: program_count,
|
437
|
+
timestamp: Time.now.iso8601
|
438
|
+
})
|
439
|
+
end
|
440
|
+
end
|
441
|
+
end
|
442
|
+
end
|