dspy 0.3.1 → 0.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,331 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'sorbet-runtime'
4
+ require_relative 'program_storage'
5
+
6
+ module DSPy
7
+ module Storage
8
+ # High-level storage manager that integrates with the teleprompter system
9
+ # Provides easy saving/loading of optimization results
10
+ class StorageManager
11
+ extend T::Sig
12
+
13
+ # Configuration for storage behavior
14
+ class StorageConfig
15
+ extend T::Sig
16
+
17
+ sig { returns(String) }
18
+ attr_accessor :storage_path
19
+
20
+ sig { returns(T::Boolean) }
21
+ attr_accessor :auto_save
22
+
23
+ sig { returns(T::Boolean) }
24
+ attr_accessor :save_intermediate_results
25
+
26
+ sig { returns(Integer) }
27
+ attr_accessor :max_stored_programs
28
+
29
+ sig { returns(T::Boolean) }
30
+ attr_accessor :compress_old_programs
31
+
32
+ sig { void }
33
+ def initialize
34
+ @storage_path = "./dspy_storage"
35
+ @auto_save = true
36
+ @save_intermediate_results = false
37
+ @max_stored_programs = 100
38
+ @compress_old_programs = false
39
+ end
40
+
41
+ sig { returns(T::Hash[Symbol, T.untyped]) }
42
+ def to_h
43
+ {
44
+ storage_path: @storage_path,
45
+ auto_save: @auto_save,
46
+ save_intermediate_results: @save_intermediate_results,
47
+ max_stored_programs: @max_stored_programs,
48
+ compress_old_programs: @compress_old_programs
49
+ }
50
+ end
51
+ end
52
+
53
+ sig { returns(StorageConfig) }
54
+ attr_reader :config
55
+
56
+ sig { returns(ProgramStorage) }
57
+ attr_reader :storage
58
+
59
+ sig { params(config: T.nilable(StorageConfig)).void }
60
+ def initialize(config: nil)
61
+ @config = config || StorageConfig.new
62
+ @storage = ProgramStorage.new(
63
+ storage_path: @config.storage_path,
64
+ create_directories: true
65
+ )
66
+ end
67
+
68
+ # Save optimization result from teleprompter
69
+ sig do
70
+ params(
71
+ optimization_result: T.untyped,
72
+ tags: T::Array[String],
73
+ description: T.nilable(String),
74
+ metadata: T::Hash[Symbol, T.untyped]
75
+ ).returns(T.nilable(ProgramStorage::SavedProgram))
76
+ end
77
+ def save_optimization_result(optimization_result, tags: [], description: nil, metadata: {})
78
+ return nil unless @config.auto_save
79
+
80
+ program = optimization_result.respond_to?(:optimized_program) ?
81
+ optimization_result.optimized_program : nil
82
+ return nil unless program
83
+
84
+ enhanced_metadata = metadata.merge({
85
+ tags: tags,
86
+ description: description,
87
+ optimizer_class: optimization_result.class.name,
88
+ saved_by: "StorageManager",
89
+ optimization_timestamp: optimization_result.respond_to?(:metadata) ?
90
+ optimization_result.metadata[:optimization_timestamp] : nil
91
+ })
92
+
93
+ @storage.save_program(
94
+ program,
95
+ optimization_result,
96
+ metadata: enhanced_metadata
97
+ )
98
+ end
99
+
100
+ # Find programs by criteria
101
+ sig do
102
+ params(
103
+ optimizer: T.nilable(String),
104
+ min_score: T.nilable(Float),
105
+ max_age_days: T.nilable(Integer),
106
+ tags: T::Array[String],
107
+ signature_class: T.nilable(String)
108
+ ).returns(T::Array[T::Hash[Symbol, T.untyped]])
109
+ end
110
+ def find_programs(optimizer: nil, min_score: nil, max_age_days: nil, tags: [], signature_class: nil)
111
+ programs = @storage.list_programs
112
+
113
+ programs.select do |program|
114
+ # Filter by optimizer
115
+ next false if optimizer && program[:optimizer] != optimizer
116
+
117
+ # Filter by minimum score
118
+ next false if min_score && (program[:best_score] || 0) < min_score
119
+
120
+ # Filter by age
121
+ if max_age_days
122
+ saved_at = Time.parse(program[:saved_at])
123
+ age_days = (Time.now - saved_at) / (24 * 60 * 60)
124
+ next false if age_days > max_age_days
125
+ end
126
+
127
+ # Filter by signature class
128
+ next false if signature_class && program[:signature_class] != signature_class
129
+
130
+ # Filter by tags (if any tags specified, program must have at least one)
131
+ if tags.any?
132
+ program_tags = program.dig(:metadata, :tags) || []
133
+ next false unless (tags & program_tags).any?
134
+ end
135
+
136
+ true
137
+ end
138
+ end
139
+
140
+ # Get the best performing program for a signature class
141
+ sig { params(signature_class: String).returns(T.nilable(ProgramStorage::SavedProgram)) }
142
+ def get_best_program(signature_class)
143
+ matching_programs = find_programs(signature_class: signature_class)
144
+ return nil if matching_programs.empty?
145
+
146
+ best_program_info = matching_programs.max_by { |p| p[:best_score] || 0 }
147
+ @storage.load_program(best_program_info[:program_id])
148
+ end
149
+
150
+ # Create a checkpoint from current optimization state
151
+ sig do
152
+ params(
153
+ optimization_result: T.untyped,
154
+ checkpoint_name: String,
155
+ metadata: T::Hash[Symbol, T.untyped]
156
+ ).returns(T.nilable(ProgramStorage::SavedProgram))
157
+ end
158
+ def create_checkpoint(optimization_result, checkpoint_name, metadata: {})
159
+ enhanced_metadata = metadata.merge({
160
+ checkpoint: true,
161
+ checkpoint_name: checkpoint_name,
162
+ created_at: Time.now.iso8601
163
+ })
164
+
165
+ save_optimization_result(
166
+ optimization_result,
167
+ tags: ["checkpoint"],
168
+ description: "Checkpoint: #{checkpoint_name}",
169
+ metadata: enhanced_metadata
170
+ )
171
+ end
172
+
173
+ # Restore from a checkpoint
174
+ sig { params(checkpoint_name: String).returns(T.nilable(ProgramStorage::SavedProgram)) }
175
+ def restore_checkpoint(checkpoint_name)
176
+ programs = find_programs(tags: ["checkpoint"])
177
+ checkpoint = programs.find do |p|
178
+ # Check both top-level and nested metadata
179
+ p[:checkpoint_name] == checkpoint_name ||
180
+ p.dig(:metadata, :checkpoint_name) == checkpoint_name
181
+ end
182
+
183
+ return nil unless checkpoint
184
+ @storage.load_program(checkpoint[:program_id])
185
+ end
186
+
187
+ # Get optimization history and trends
188
+ sig { returns(T::Hash[Symbol, T.untyped]) }
189
+ def get_optimization_history
190
+ history = @storage.get_history
191
+
192
+ # Calculate trends
193
+ programs = history[:programs] || []
194
+ return history if programs.empty?
195
+
196
+ # Group by optimizer
197
+ by_optimizer = programs.group_by { |p| p[:optimizer] }
198
+ optimizer_stats = by_optimizer.transform_values do |progs|
199
+ scores = progs.map { |p| p[:best_score] }.compact
200
+ {
201
+ count: progs.size,
202
+ avg_score: scores.sum.to_f / scores.size,
203
+ best_score: scores.max,
204
+ latest: progs.max_by { |p| Time.parse(p[:saved_at]) }
205
+ }
206
+ end
207
+
208
+ # Calculate improvement trends
209
+ sorted_programs = programs.sort_by { |p| Time.parse(p[:saved_at]) }
210
+ recent_programs = sorted_programs.last(10)
211
+ older_programs = sorted_programs.first([sorted_programs.size - 10, 1].max)
212
+
213
+ recent_avg = recent_programs.map { |p| p[:best_score] }.compact.sum.to_f / recent_programs.size
214
+ older_avg = older_programs.map { |p| p[:best_score] }.compact.sum.to_f / older_programs.size
215
+ improvement_trend = older_avg > 0 ? ((recent_avg - older_avg) / older_avg * 100).round(2) : 0
216
+
217
+ history.merge({
218
+ optimizer_stats: optimizer_stats,
219
+ trends: {
220
+ improvement_percentage: improvement_trend,
221
+ recent_avg_score: recent_avg.round(4),
222
+ older_avg_score: older_avg.round(4)
223
+ }
224
+ })
225
+ end
226
+
227
+ # Clean up old programs based on configuration
228
+ sig { returns(Integer) }
229
+ def cleanup_old_programs
230
+ return 0 unless @config.max_stored_programs > 0
231
+
232
+ programs = @storage.list_programs
233
+ return 0 if programs.size <= @config.max_stored_programs
234
+
235
+ # Sort by score (keep best) and recency (keep recent)
236
+ sorted_programs = programs.sort_by do |p|
237
+ score_rank = p[:best_score] || 0
238
+ time_rank = Time.parse(p[:saved_at]).to_f / 1_000_000 # Convert to smaller number
239
+
240
+ # Weighted combination: 70% score, 30% recency
241
+ -(score_rank * 0.7 + time_rank * 0.3)
242
+ end
243
+
244
+ programs_to_delete = sorted_programs.drop(@config.max_stored_programs)
245
+ deleted_count = 0
246
+
247
+ programs_to_delete.each do |program|
248
+ if @storage.delete_program(program[:program_id])
249
+ deleted_count += 1
250
+ end
251
+ end
252
+
253
+ DSPy::Instrumentation.emit('dspy.storage.cleanup', {
254
+ deleted_count: deleted_count,
255
+ remaining_count: @config.max_stored_programs,
256
+ timestamp: Time.now.iso8601
257
+ })
258
+
259
+ deleted_count
260
+ end
261
+
262
+ # Compare two programs
263
+ sig do
264
+ params(
265
+ program_id_1: String,
266
+ program_id_2: String
267
+ ).returns(T.nilable(T::Hash[Symbol, T.untyped]))
268
+ end
269
+ def compare_programs(program_id_1, program_id_2)
270
+ program1 = @storage.load_program(program_id_1)
271
+ program2 = @storage.load_program(program_id_2)
272
+
273
+ return nil unless program1 && program2
274
+
275
+ {
276
+ program_1: {
277
+ id: program1.program_id,
278
+ score: program1.optimization_result[:best_score_value],
279
+ optimizer: program1.optimization_result[:metadata]&.dig(:optimizer),
280
+ saved_at: program1.saved_at.iso8601
281
+ },
282
+ program_2: {
283
+ id: program2.program_id,
284
+ score: program2.optimization_result[:best_score_value],
285
+ optimizer: program2.optimization_result[:metadata]&.dig(:optimizer),
286
+ saved_at: program2.saved_at.iso8601
287
+ },
288
+ comparison: {
289
+ score_difference: (program1.optimization_result[:best_score_value] || 0) -
290
+ (program2.optimization_result[:best_score_value] || 0),
291
+ better_program: (program1.optimization_result[:best_score_value] || 0) >
292
+ (program2.optimization_result[:best_score_value] || 0) ?
293
+ program_id_1 : program_id_2,
294
+ age_difference_hours: ((program1.saved_at - program2.saved_at) / 3600).round(2)
295
+ }
296
+ }
297
+ end
298
+
299
+ # Global storage instance
300
+ @@instance = T.let(nil, T.nilable(StorageManager))
301
+
302
+ # Get global storage instance
303
+ sig { returns(StorageManager) }
304
+ def self.instance
305
+ @@instance ||= new
306
+ end
307
+
308
+ # Configure global storage
309
+ sig { params(config: StorageConfig).void }
310
+ def self.configure(config)
311
+ @@instance = new(config: config)
312
+ end
313
+
314
+ # Shorthand methods for common operations
315
+ sig { params(optimization_result: T.untyped, metadata: T::Hash[Symbol, T.untyped]).returns(T.nilable(ProgramStorage::SavedProgram)) }
316
+ def self.save(optimization_result, metadata: {})
317
+ instance.save_optimization_result(optimization_result, metadata: metadata)
318
+ end
319
+
320
+ sig { params(program_id: String).returns(T.nilable(ProgramStorage::SavedProgram)) }
321
+ def self.load(program_id)
322
+ instance.storage.load_program(program_id)
323
+ end
324
+
325
+ sig { params(signature_class: String).returns(T.nilable(ProgramStorage::SavedProgram)) }
326
+ def self.best(signature_class)
327
+ instance.get_best_program(signature_class)
328
+ end
329
+ end
330
+ end
331
+ end