dspy 0.3.1 → 0.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,442 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'sorbet-runtime'
4
+ require 'json'
5
+ require 'fileutils'
6
+ require 'digest'
7
+
8
+ module DSPy
9
+ module Storage
10
+ # Storage system for saving and loading optimized DSPy programs
11
+ # Handles serialization of optimization results, program state, and history tracking
12
+ class ProgramStorage
13
+ extend T::Sig
14
+
15
+ # Represents a saved program with metadata
16
+ class SavedProgram
17
+ extend T::Sig
18
+
19
+ sig { returns(T.untyped) }
20
+ attr_reader :program
21
+
22
+ sig { returns(T::Hash[Symbol, T.untyped]) }
23
+ attr_reader :optimization_result
24
+
25
+ sig { returns(T::Hash[Symbol, T.untyped]) }
26
+ attr_reader :metadata
27
+
28
+ sig { returns(String) }
29
+ attr_reader :program_id
30
+
31
+ sig { returns(Time) }
32
+ attr_reader :saved_at
33
+
34
+ sig do
35
+ params(
36
+ program: T.untyped,
37
+ optimization_result: T::Hash[Symbol, T.untyped],
38
+ metadata: T::Hash[Symbol, T.untyped],
39
+ program_id: T.nilable(String),
40
+ saved_at: T.nilable(Time)
41
+ ).void
42
+ end
43
+ def initialize(program:, optimization_result:, metadata: {}, program_id: nil, saved_at: nil)
44
+ @program = program
45
+ @optimization_result = optimization_result
46
+ dspy_version = begin
47
+ DSPy::VERSION
48
+ rescue
49
+ "unknown"
50
+ end
51
+
52
+ @metadata = metadata.merge({
53
+ dspy_version: dspy_version,
54
+ ruby_version: RUBY_VERSION,
55
+ saved_with: "DSPy::Storage::ProgramStorage"
56
+ })
57
+ @program_id = program_id || generate_program_id
58
+ @saved_at = saved_at || Time.now
59
+ end
60
+
61
+ sig { returns(T::Hash[Symbol, T.untyped]) }
62
+ def to_h
63
+ {
64
+ program_id: @program_id,
65
+ saved_at: @saved_at.iso8601,
66
+ program_data: serialize_program(@program),
67
+ optimization_result: @optimization_result,
68
+ metadata: @metadata
69
+ }
70
+ end
71
+
72
+ sig { params(data: T::Hash[Symbol, T.untyped]).returns(SavedProgram) }
73
+ def self.from_h(data)
74
+ new(
75
+ program: deserialize_program(data[:program_data]),
76
+ optimization_result: data[:optimization_result],
77
+ metadata: data[:metadata] || {},
78
+ program_id: data[:program_id],
79
+ saved_at: Time.parse(data[:saved_at])
80
+ )
81
+ end
82
+
83
+ private
84
+
85
+ sig { returns(String) }
86
+ def generate_program_id
87
+ content = "#{@optimization_result[:best_score_value]}_#{@metadata.hash}_#{Time.now.to_f}"
88
+ Digest::SHA256.hexdigest(content)[0, 16]
89
+ end
90
+
91
+ sig { params(program: T.untyped).returns(T::Hash[Symbol, T.untyped]) }
92
+ def serialize_program(program)
93
+ # Basic serialization - can be enhanced for specific program types
94
+ {
95
+ class_name: program.class.name,
96
+ state: extract_program_state(program)
97
+ }
98
+ end
99
+
100
+ sig { params(data: T::Hash[Symbol, T.untyped]).returns(T.untyped) }
101
+ def self.deserialize_program(data)
102
+ # Basic deserialization - would need enhancement for complex programs
103
+ # For now, return the serialized state
104
+ data
105
+ end
106
+
107
+ sig { params(program: T.untyped).returns(T::Hash[Symbol, T.untyped]) }
108
+ def extract_program_state(program)
109
+ state = {}
110
+
111
+ # Extract common program properties
112
+ if program.respond_to?(:signature_class)
113
+ state[:signature_class] = program.signature_class&.name
114
+ end
115
+
116
+ if program.respond_to?(:prompt) && program.prompt.respond_to?(:instruction)
117
+ state[:instruction] = program.prompt.instruction
118
+ end
119
+
120
+ if program.respond_to?(:few_shot_examples)
121
+ state[:few_shot_examples] = program.few_shot_examples
122
+ end
123
+
124
+ state
125
+ end
126
+ end
127
+
128
+ sig { returns(String) }
129
+ attr_reader :storage_path
130
+
131
+ sig { returns(T::Boolean) }
132
+ attr_reader :create_directories
133
+
134
+ sig do
135
+ params(
136
+ storage_path: String,
137
+ create_directories: T::Boolean
138
+ ).void
139
+ end
140
+ def initialize(storage_path: "./dspy_storage", create_directories: true)
141
+ @storage_path = File.expand_path(storage_path)
142
+ @create_directories = create_directories
143
+
144
+ setup_storage_directory if @create_directories
145
+ end
146
+
147
+ # Save an optimized program with its optimization results
148
+ sig do
149
+ params(
150
+ program: T.untyped,
151
+ optimization_result: T.untyped,
152
+ program_id: T.nilable(String),
153
+ metadata: T::Hash[Symbol, T.untyped]
154
+ ).returns(SavedProgram)
155
+ end
156
+ def save_program(program, optimization_result, program_id: nil, metadata: {})
157
+ emit_save_start_event(program_id)
158
+
159
+ begin
160
+ # Convert optimization result to hash if it's an object
161
+ result_hash = optimization_result.respond_to?(:to_h) ? optimization_result.to_h : optimization_result
162
+
163
+ saved_program = SavedProgram.new(
164
+ program: program,
165
+ optimization_result: result_hash,
166
+ metadata: metadata,
167
+ program_id: program_id
168
+ )
169
+
170
+ # Write to file
171
+ file_path = program_file_path(saved_program.program_id)
172
+ File.write(file_path, JSON.pretty_generate(saved_program.to_h))
173
+
174
+ # Update history
175
+ update_history(saved_program)
176
+
177
+ emit_save_complete_event(saved_program)
178
+ saved_program
179
+
180
+ rescue => error
181
+ emit_save_error_event(program_id, error)
182
+ raise
183
+ end
184
+ end
185
+
186
+ # Load a program by its ID
187
+ sig { params(program_id: String).returns(T.nilable(SavedProgram)) }
188
+ def load_program(program_id)
189
+ emit_load_start_event(program_id)
190
+
191
+ begin
192
+ file_path = program_file_path(program_id)
193
+
194
+ unless File.exist?(file_path)
195
+ emit_load_error_event(program_id, "Program not found: #{program_id}")
196
+ return nil
197
+ end
198
+
199
+ data = JSON.parse(File.read(file_path), symbolize_names: true)
200
+ saved_program = SavedProgram.from_h(data)
201
+
202
+ emit_load_complete_event(saved_program)
203
+ saved_program
204
+
205
+ rescue => error
206
+ emit_load_error_event(program_id, error)
207
+ nil
208
+ end
209
+ end
210
+
211
+ # List all saved programs
212
+ sig { returns(T::Array[T::Hash[Symbol, T.untyped]]) }
213
+ def list_programs
214
+ history_path = File.join(@storage_path, "history.json")
215
+ return [] unless File.exist?(history_path)
216
+
217
+ history_data = JSON.parse(File.read(history_path), symbolize_names: true)
218
+ history_data[:programs] || []
219
+ end
220
+
221
+ # Get program history with performance metrics
222
+ sig { returns(T::Hash[Symbol, T.untyped]) }
223
+ def get_history
224
+ history_path = File.join(@storage_path, "history.json")
225
+ return { programs: [], summary: {} } unless File.exist?(history_path)
226
+
227
+ JSON.parse(File.read(history_path), symbolize_names: true)
228
+ end
229
+
230
+ # Delete a saved program
231
+ sig { params(program_id: String).returns(T::Boolean) }
232
+ def delete_program(program_id)
233
+ file_path = program_file_path(program_id)
234
+
235
+ if File.exist?(file_path)
236
+ File.delete(file_path)
237
+ remove_from_history(program_id)
238
+ emit_delete_event(program_id)
239
+ true
240
+ else
241
+ false
242
+ end
243
+ end
244
+
245
+ # Export multiple programs to a single file
246
+ sig { params(program_ids: T::Array[String], export_path: String).void }
247
+ def export_programs(program_ids, export_path)
248
+ programs = program_ids.map { |id| load_program(id) }.compact
249
+
250
+ dspy_version = begin
251
+ DSPy::VERSION
252
+ rescue
253
+ "unknown"
254
+ end
255
+
256
+ export_data = {
257
+ exported_at: Time.now.iso8601,
258
+ dspy_version: dspy_version,
259
+ programs: programs.map(&:to_h)
260
+ }
261
+
262
+ File.write(export_path, JSON.pretty_generate(export_data))
263
+ emit_export_event(export_path, programs.size)
264
+ end
265
+
266
+ # Import programs from an exported file
267
+ sig { params(import_path: String).returns(T::Array[SavedProgram]) }
268
+ def import_programs(import_path)
269
+ data = JSON.parse(File.read(import_path), symbolize_names: true)
270
+ imported = []
271
+
272
+ data[:programs].each do |program_data|
273
+ saved_program = SavedProgram.from_h(program_data)
274
+
275
+ # Save with new timestamp but preserve original ID
276
+ file_path = program_file_path(saved_program.program_id)
277
+ File.write(file_path, JSON.pretty_generate(saved_program.to_h))
278
+
279
+ update_history(saved_program)
280
+ imported << saved_program
281
+ end
282
+
283
+ emit_import_event(import_path, imported.size)
284
+ imported
285
+ end
286
+
287
+ private
288
+
289
+ sig { void }
290
+ def setup_storage_directory
291
+ FileUtils.mkdir_p(@storage_path) unless Dir.exist?(@storage_path)
292
+
293
+ # Create programs subdirectory
294
+ programs_dir = File.join(@storage_path, "programs")
295
+ FileUtils.mkdir_p(programs_dir) unless Dir.exist?(programs_dir)
296
+ end
297
+
298
+ sig { params(program_id: String).returns(String) }
299
+ def program_file_path(program_id)
300
+ File.join(@storage_path, "programs", "#{program_id}.json")
301
+ end
302
+
303
+ sig { params(saved_program: SavedProgram).void }
304
+ def update_history(saved_program)
305
+ history_path = File.join(@storage_path, "history.json")
306
+
307
+ history = if File.exist?(history_path)
308
+ JSON.parse(File.read(history_path), symbolize_names: true)
309
+ else
310
+ { programs: [], summary: { total_programs: 0, avg_score: 0.0 } }
311
+ end
312
+
313
+ # Add or update program entry
314
+ program_entry = {
315
+ program_id: saved_program.program_id,
316
+ saved_at: saved_program.saved_at.iso8601,
317
+ best_score: saved_program.optimization_result[:best_score_value],
318
+ score_name: saved_program.optimization_result[:best_score_name],
319
+ optimizer: saved_program.optimization_result[:metadata]&.dig(:optimizer),
320
+ signature_class: saved_program.metadata[:signature_class],
321
+ metadata: saved_program.metadata
322
+ }
323
+
324
+ # Remove existing entry if updating
325
+ history[:programs].reject! { |p| p[:program_id] == saved_program.program_id }
326
+ history[:programs] << program_entry
327
+
328
+ # Update summary
329
+ scores = history[:programs].map { |p| p[:best_score] }.compact
330
+ history[:summary] = {
331
+ total_programs: history[:programs].size,
332
+ avg_score: scores.empty? ? 0.0 : scores.sum.to_f / scores.size,
333
+ latest_save: saved_program.saved_at.iso8601
334
+ }
335
+
336
+ File.write(history_path, JSON.pretty_generate(history))
337
+ end
338
+
339
+ sig { params(program_id: String).void }
340
+ def remove_from_history(program_id)
341
+ history_path = File.join(@storage_path, "history.json")
342
+ return unless File.exist?(history_path)
343
+
344
+ history = JSON.parse(File.read(history_path), symbolize_names: true)
345
+ history[:programs].reject! { |p| p[:program_id] == program_id }
346
+
347
+ # Recalculate summary
348
+ scores = history[:programs].map { |p| p[:best_score] }.compact
349
+ history[:summary] = {
350
+ total_programs: history[:programs].size,
351
+ avg_score: scores.empty? ? 0.0 : scores.sum.to_f / scores.size
352
+ }
353
+
354
+ File.write(history_path, JSON.pretty_generate(history))
355
+ end
356
+
357
+ # Event emission methods
358
+ sig { params(program_id: T.nilable(String)).void }
359
+ def emit_save_start_event(program_id)
360
+ DSPy::Instrumentation.emit('dspy.storage.save_start', {
361
+ program_id: program_id,
362
+ storage_path: @storage_path,
363
+ timestamp: Time.now.iso8601
364
+ })
365
+ end
366
+
367
+ sig { params(saved_program: SavedProgram).void }
368
+ def emit_save_complete_event(saved_program)
369
+ DSPy::Instrumentation.emit('dspy.storage.save_complete', {
370
+ program_id: saved_program.program_id,
371
+ best_score: saved_program.optimization_result[:best_score_value],
372
+ file_size_bytes: File.size(program_file_path(saved_program.program_id)),
373
+ timestamp: Time.now.iso8601
374
+ })
375
+ end
376
+
377
+ sig { params(program_id: T.nilable(String), error: Exception).void }
378
+ def emit_save_error_event(program_id, error)
379
+ DSPy::Instrumentation.emit('dspy.storage.save_error', {
380
+ program_id: program_id,
381
+ error: error.message,
382
+ error_class: error.class.name,
383
+ timestamp: Time.now.iso8601
384
+ })
385
+ end
386
+
387
+ sig { params(program_id: String).void }
388
+ def emit_load_start_event(program_id)
389
+ DSPy::Instrumentation.emit('dspy.storage.load_start', {
390
+ program_id: program_id,
391
+ timestamp: Time.now.iso8601
392
+ })
393
+ end
394
+
395
+ sig { params(saved_program: SavedProgram).void }
396
+ def emit_load_complete_event(saved_program)
397
+ DSPy::Instrumentation.emit('dspy.storage.load_complete', {
398
+ program_id: saved_program.program_id,
399
+ saved_at: saved_program.saved_at.iso8601,
400
+ age_hours: ((Time.now - saved_program.saved_at) / 3600).round(2),
401
+ timestamp: Time.now.iso8601
402
+ })
403
+ end
404
+
405
+ sig { params(program_id: String, error: T.any(String, Exception)).void }
406
+ def emit_load_error_event(program_id, error)
407
+ error_message = error.is_a?(Exception) ? error.message : error.to_s
408
+ DSPy::Instrumentation.emit('dspy.storage.load_error', {
409
+ program_id: program_id,
410
+ error: error_message,
411
+ timestamp: Time.now.iso8601
412
+ })
413
+ end
414
+
415
+ sig { params(program_id: String).void }
416
+ def emit_delete_event(program_id)
417
+ DSPy::Instrumentation.emit('dspy.storage.delete', {
418
+ program_id: program_id,
419
+ timestamp: Time.now.iso8601
420
+ })
421
+ end
422
+
423
+ sig { params(export_path: String, program_count: Integer).void }
424
+ def emit_export_event(export_path, program_count)
425
+ DSPy::Instrumentation.emit('dspy.storage.export', {
426
+ export_path: export_path,
427
+ program_count: program_count,
428
+ timestamp: Time.now.iso8601
429
+ })
430
+ end
431
+
432
+ sig { params(import_path: String, program_count: Integer).void }
433
+ def emit_import_event(import_path, program_count)
434
+ DSPy::Instrumentation.emit('dspy.storage.import', {
435
+ import_path: import_path,
436
+ program_count: program_count,
437
+ timestamp: Time.now.iso8601
438
+ })
439
+ end
440
+ end
441
+ end
442
+ end