dspy 0.3.1 → 0.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,538 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'sorbet-runtime'
4
+
5
+ begin
6
+ require 'opentelemetry/api'
7
+ require 'opentelemetry/sdk'
8
+ require 'opentelemetry/exporter/otlp'
9
+ rescue LoadError
10
+ # OpenTelemetry is optional - will be no-op if not available
11
+ end
12
+
13
+ module DSPy
14
+ module Subscribers
15
+ # OpenTelemetry subscriber that creates spans and metrics for DSPy operations
16
+ # Provides comprehensive tracing for optimization operations and LM calls
17
+ class OtelSubscriber
18
+ extend T::Sig
19
+
20
+ # Configuration for OpenTelemetry integration
21
+ class OtelConfig
22
+ extend T::Sig
23
+
24
+ sig { returns(T::Boolean) }
25
+ attr_accessor :enabled
26
+
27
+ sig { returns(String) }
28
+ attr_accessor :service_name
29
+
30
+ sig { returns(String) }
31
+ attr_accessor :service_version
32
+
33
+ sig { returns(T.nilable(String)) }
34
+ attr_accessor :endpoint
35
+
36
+ sig { returns(T::Hash[String, String]) }
37
+ attr_accessor :headers
38
+
39
+ sig { returns(T::Boolean) }
40
+ attr_accessor :trace_optimization_events
41
+
42
+ sig { returns(T::Boolean) }
43
+ attr_accessor :trace_lm_events
44
+
45
+ sig { returns(T::Boolean) }
46
+ attr_accessor :export_metrics
47
+
48
+ sig { returns(Float) }
49
+ attr_accessor :sample_rate
50
+
51
+ sig { void }
52
+ def initialize
53
+ @enabled = !!(defined?(OpenTelemetry) && ENV['OTEL_EXPORTER_OTLP_ENDPOINT'])
54
+ @service_name = ENV.fetch('OTEL_SERVICE_NAME', 'dspy-ruby')
55
+ @service_version = begin
56
+ ENV.fetch('OTEL_SERVICE_VERSION', DSPy::VERSION)
57
+ rescue
58
+ '1.0.0'
59
+ end
60
+ @endpoint = ENV['OTEL_EXPORTER_OTLP_ENDPOINT']
61
+ @headers = parse_headers(ENV['OTEL_EXPORTER_OTLP_HEADERS'])
62
+ @trace_optimization_events = true
63
+ @trace_lm_events = true
64
+ @export_metrics = true
65
+ @sample_rate = ENV.fetch('OTEL_TRACE_SAMPLE_RATE', '1.0').to_f
66
+ end
67
+
68
+ private
69
+
70
+ sig { params(headers_str: T.nilable(String)).returns(T::Hash[String, String]) }
71
+ def parse_headers(headers_str)
72
+ return {} unless headers_str
73
+
74
+ headers_str.split(',').each_with_object({}) do |header, hash|
75
+ key, value = header.split('=', 2)
76
+ hash[key.strip] = value&.strip || ''
77
+ end
78
+ end
79
+ end
80
+
81
+ sig { returns(OtelConfig) }
82
+ attr_reader :config
83
+
84
+ sig { params(config: T.nilable(OtelConfig)).void }
85
+ def initialize(config: nil)
86
+ @config = config || OtelConfig.new
87
+ @tracer = T.let(nil, T.nilable(T.untyped))
88
+ @meter = T.let(nil, T.nilable(T.untyped))
89
+ @optimization_spans = T.let({}, T::Hash[String, T.untyped])
90
+ @trial_spans = T.let({}, T::Hash[String, T.untyped])
91
+
92
+ setup_opentelemetry if @config.enabled
93
+ setup_event_subscriptions
94
+ end
95
+
96
+ private
97
+
98
+ sig { void }
99
+ def setup_opentelemetry
100
+ return unless defined?(OpenTelemetry)
101
+
102
+ # Configure OpenTelemetry
103
+ OpenTelemetry::SDK.configure do |c|
104
+ c.service_name = @config.service_name
105
+ c.service_version = @config.service_version
106
+
107
+ if @config.endpoint
108
+ c.add_span_processor(
109
+ OpenTelemetry::SDK::Trace::Export::BatchSpanProcessor.new(
110
+ OpenTelemetry::Exporter::OTLP::Exporter.new(
111
+ endpoint: @config.endpoint,
112
+ headers: @config.headers
113
+ )
114
+ )
115
+ )
116
+ end
117
+ end
118
+
119
+ version = begin
120
+ DSPy::VERSION
121
+ rescue
122
+ '1.0.0'
123
+ end
124
+
125
+ @tracer = OpenTelemetry.tracer_provider.tracer('dspy-ruby', version)
126
+ @meter = OpenTelemetry.meter_provider.meter('dspy-ruby', version) if @config.export_metrics
127
+ rescue => error
128
+ warn "Failed to setup OpenTelemetry: #{error.message}"
129
+ @config.enabled = false
130
+ end
131
+
132
+ sig { void }
133
+ def setup_event_subscriptions
134
+ return unless @config.enabled && @tracer
135
+
136
+ # Subscribe to optimization events
137
+ if @config.trace_optimization_events
138
+ setup_optimization_subscriptions
139
+ end
140
+
141
+ # Subscribe to LM events
142
+ if @config.trace_lm_events
143
+ setup_lm_subscriptions
144
+ end
145
+
146
+ # Subscribe to storage and registry events
147
+ setup_storage_subscriptions
148
+ setup_registry_subscriptions
149
+ end
150
+
151
+ sig { void }
152
+ def setup_optimization_subscriptions
153
+ DSPy::Instrumentation.subscribe('dspy.optimization.start') do |event|
154
+ handle_optimization_start(event)
155
+ end
156
+
157
+ DSPy::Instrumentation.subscribe('dspy.optimization.complete') do |event|
158
+ handle_optimization_complete(event)
159
+ end
160
+
161
+ DSPy::Instrumentation.subscribe('dspy.optimization.trial_start') do |event|
162
+ handle_trial_start(event)
163
+ end
164
+
165
+ DSPy::Instrumentation.subscribe('dspy.optimization.trial_complete') do |event|
166
+ handle_trial_complete(event)
167
+ end
168
+
169
+ DSPy::Instrumentation.subscribe('dspy.optimization.bootstrap_start') do |event|
170
+ handle_bootstrap_start(event)
171
+ end
172
+
173
+ DSPy::Instrumentation.subscribe('dspy.optimization.bootstrap_complete') do |event|
174
+ handle_bootstrap_complete(event)
175
+ end
176
+
177
+ DSPy::Instrumentation.subscribe('dspy.optimization.error') do |event|
178
+ handle_optimization_error(event)
179
+ end
180
+ end
181
+
182
+ sig { void }
183
+ def setup_lm_subscriptions
184
+ DSPy::Instrumentation.subscribe('dspy.lm.request') do |event|
185
+ handle_lm_request(event)
186
+ end
187
+
188
+ DSPy::Instrumentation.subscribe('dspy.predict') do |event|
189
+ handle_prediction(event)
190
+ end
191
+
192
+ DSPy::Instrumentation.subscribe('dspy.chain_of_thought') do |event|
193
+ handle_chain_of_thought(event)
194
+ end
195
+ end
196
+
197
+ sig { void }
198
+ def setup_storage_subscriptions
199
+ DSPy::Instrumentation.subscribe('dspy.storage.save_start') do |event|
200
+ handle_storage_operation(event, 'save')
201
+ end
202
+
203
+ DSPy::Instrumentation.subscribe('dspy.storage.load_start') do |event|
204
+ handle_storage_operation(event, 'load')
205
+ end
206
+ end
207
+
208
+ sig { void }
209
+ def setup_registry_subscriptions
210
+ DSPy::Instrumentation.subscribe('dspy.registry.register_start') do |event|
211
+ handle_registry_operation(event, 'register')
212
+ end
213
+
214
+ DSPy::Instrumentation.subscribe('dspy.registry.deploy_start') do |event|
215
+ handle_registry_operation(event, 'deploy')
216
+ end
217
+
218
+ DSPy::Instrumentation.subscribe('dspy.registry.rollback_start') do |event|
219
+ handle_registry_operation(event, 'rollback')
220
+ end
221
+ end
222
+
223
+ # Optimization event handlers
224
+ sig { params(event: T.untyped).void }
225
+ def handle_optimization_start(event)
226
+ return unless @tracer
227
+
228
+ payload = event.payload
229
+ optimization_id = payload[:optimization_id] || SecureRandom.uuid
230
+
231
+ span = @tracer.start_span(
232
+ 'dspy.optimization',
233
+ attributes: {
234
+ 'dspy.operation' => 'optimization',
235
+ 'dspy.optimization.id' => optimization_id,
236
+ 'dspy.optimization.optimizer' => payload[:optimizer] || 'unknown',
237
+ 'dspy.optimization.trainset_size' => payload[:trainset_size],
238
+ 'dspy.optimization.valset_size' => payload[:valset_size],
239
+ 'dspy.optimization.config' => payload[:config]&.to_s
240
+ }
241
+ )
242
+
243
+ @optimization_spans[optimization_id] = span
244
+
245
+ # Add metrics
246
+ if @meter
247
+ @meter.create_counter(
248
+ 'dspy.optimization.started',
249
+ description: 'Number of optimizations started'
250
+ ).add(1, attributes: {
251
+ 'optimizer' => payload[:optimizer] || 'unknown'
252
+ })
253
+ end
254
+ end
255
+
256
+ sig { params(event: T.untyped).void }
257
+ def handle_optimization_complete(event)
258
+ return unless @tracer
259
+
260
+ payload = event.payload
261
+ optimization_id = payload[:optimization_id]
262
+ span = @optimization_spans.delete(optimization_id)
263
+
264
+ return unless span
265
+
266
+ span.set_attribute('dspy.optimization.status', 'success')
267
+ span.set_attribute('dspy.optimization.duration_ms', payload[:duration_ms])
268
+ span.set_attribute('dspy.optimization.best_score', payload[:best_score])
269
+ span.set_attribute('dspy.optimization.trials_count', payload[:trials_count])
270
+ span.set_attribute('dspy.optimization.final_instruction', payload[:final_instruction]&.slice(0, 500))
271
+
272
+ span.finish
273
+
274
+ # Record metrics
275
+ if @meter && payload[:duration_ms]
276
+ @meter.create_histogram(
277
+ 'dspy.optimization.duration',
278
+ description: 'Optimization duration in milliseconds'
279
+ ).record(payload[:duration_ms], attributes: {
280
+ 'optimizer' => payload[:optimizer] || 'unknown',
281
+ 'status' => 'success'
282
+ })
283
+
284
+ if payload[:best_score]
285
+ @meter.create_histogram(
286
+ 'dspy.optimization.score',
287
+ description: 'Best optimization score achieved'
288
+ ).record(payload[:best_score], attributes: {
289
+ 'optimizer' => payload[:optimizer] || 'unknown'
290
+ })
291
+ end
292
+ end
293
+ end
294
+
295
+ sig { params(event: T.untyped).void }
296
+ def handle_trial_start(event)
297
+ return unless @tracer
298
+
299
+ payload = event.payload
300
+ trial_id = "#{payload[:optimization_id]}_#{payload[:trial_number]}"
301
+
302
+ span = @tracer.start_span(
303
+ 'dspy.optimization.trial',
304
+ attributes: {
305
+ 'dspy.operation' => 'optimization_trial',
306
+ 'dspy.trial.id' => trial_id,
307
+ 'dspy.trial.number' => payload[:trial_number],
308
+ 'dspy.trial.instruction' => payload[:instruction]&.slice(0, 200),
309
+ 'dspy.trial.examples_count' => payload[:examples_count]
310
+ }
311
+ )
312
+
313
+ @trial_spans[trial_id] = span
314
+ end
315
+
316
+ sig { params(event: T.untyped).void }
317
+ def handle_trial_complete(event)
318
+ return unless @tracer
319
+
320
+ payload = event.payload
321
+ trial_id = "#{payload[:optimization_id]}_#{payload[:trial_number]}"
322
+ span = @trial_spans.delete(trial_id)
323
+
324
+ return unless span
325
+
326
+ span.set_attribute('dspy.trial.status', payload[:status] || 'success')
327
+ span.set_attribute('dspy.trial.duration_ms', payload[:duration_ms])
328
+ span.set_attribute('dspy.trial.score', payload[:score]) if payload[:score]
329
+ span.set_attribute('dspy.trial.error', payload[:error_message]) if payload[:error_message]
330
+
331
+ if payload[:status] == 'error'
332
+ span.record_exception(payload[:error_message] || 'Unknown error')
333
+ span.status = OpenTelemetry::Trace::Status.error('Trial failed')
334
+ end
335
+
336
+ span.finish
337
+ end
338
+
339
+ sig { params(event: T.untyped).void }
340
+ def handle_bootstrap_start(event)
341
+ return unless @tracer
342
+
343
+ payload = event.payload
344
+
345
+ @tracer.in_span(
346
+ 'dspy.optimization.bootstrap',
347
+ attributes: {
348
+ 'dspy.operation' => 'bootstrap',
349
+ 'dspy.bootstrap.target_count' => payload[:target_count],
350
+ 'dspy.bootstrap.trainset_size' => payload[:trainset_size]
351
+ }
352
+ ) do |span|
353
+ # Span will be automatically finished when block exits
354
+ end
355
+ end
356
+
357
+ sig { params(event: T.untyped).void }
358
+ def handle_bootstrap_complete(event)
359
+ # Bootstrap complete is handled by the span from bootstrap_start
360
+ end
361
+
362
+ sig { params(event: T.untyped).void }
363
+ def handle_optimization_error(event)
364
+ return unless @tracer
365
+
366
+ payload = event.payload
367
+ optimization_id = payload[:optimization_id]
368
+ span = @optimization_spans.delete(optimization_id)
369
+
370
+ if span
371
+ span.set_attribute('dspy.optimization.status', 'error')
372
+ span.set_attribute('dspy.optimization.error', payload[:error_message])
373
+ span.record_exception(payload[:error_message] || 'Unknown optimization error')
374
+ span.status = OpenTelemetry::Trace::Status.error('Optimization failed')
375
+ span.finish
376
+ end
377
+
378
+ # Record error metrics
379
+ if @meter
380
+ @meter.create_counter(
381
+ 'dspy.optimization.errors',
382
+ description: 'Number of optimization errors'
383
+ ).add(1, attributes: {
384
+ 'optimizer' => payload[:optimizer] || 'unknown',
385
+ 'error_type' => payload[:error_type] || 'unknown'
386
+ })
387
+ end
388
+ end
389
+
390
+ # LM event handlers
391
+ sig { params(event: T.untyped).void }
392
+ def handle_lm_request(event)
393
+ return unless @tracer
394
+
395
+ payload = event.payload
396
+
397
+ @tracer.in_span(
398
+ 'dspy.lm.request',
399
+ attributes: {
400
+ 'dspy.operation' => 'lm_request',
401
+ 'dspy.lm.provider' => payload[:provider],
402
+ 'dspy.lm.model' => payload[:gen_ai_request_model] || payload[:model],
403
+ 'dspy.lm.status' => payload[:status],
404
+ 'dspy.lm.duration_ms' => payload[:duration_ms],
405
+ 'dspy.lm.tokens_total' => payload[:tokens_total],
406
+ 'dspy.lm.tokens_input' => payload[:tokens_input],
407
+ 'dspy.lm.tokens_output' => payload[:tokens_output],
408
+ 'dspy.lm.cost' => payload[:cost]
409
+ }
410
+ ) do |span|
411
+ if payload[:status] == 'error'
412
+ span.record_exception(payload[:error_message] || 'LM request failed')
413
+ span.status = OpenTelemetry::Trace::Status.error('LM request failed')
414
+ end
415
+
416
+ # Record metrics
417
+ if @meter
418
+ if payload[:duration_ms]
419
+ @meter.create_histogram(
420
+ 'dspy.lm.request.duration',
421
+ description: 'LM request duration in milliseconds'
422
+ ).record(payload[:duration_ms], attributes: {
423
+ 'provider' => payload[:provider],
424
+ 'model' => payload[:gen_ai_request_model] || payload[:model],
425
+ 'status' => payload[:status]
426
+ })
427
+ end
428
+
429
+ if payload[:tokens_total]
430
+ @meter.create_histogram(
431
+ 'dspy.lm.tokens.total',
432
+ description: 'Total tokens used in LM request'
433
+ ).record(payload[:tokens_total], attributes: {
434
+ 'provider' => payload[:provider],
435
+ 'model' => payload[:gen_ai_request_model] || payload[:model]
436
+ })
437
+ end
438
+
439
+ if payload[:cost]
440
+ @meter.create_histogram(
441
+ 'dspy.lm.cost',
442
+ description: 'Cost of LM request'
443
+ ).record(payload[:cost], attributes: {
444
+ 'provider' => payload[:provider],
445
+ 'model' => payload[:gen_ai_request_model] || payload[:model]
446
+ })
447
+ end
448
+ end
449
+ end
450
+ end
451
+
452
+ sig { params(event: T.untyped).void }
453
+ def handle_prediction(event)
454
+ return unless @tracer
455
+
456
+ payload = event.payload
457
+
458
+ @tracer.in_span(
459
+ 'dspy.predict',
460
+ attributes: {
461
+ 'dspy.operation' => 'predict',
462
+ 'dspy.signature' => payload[:signature_class],
463
+ 'dspy.predict.status' => payload[:status],
464
+ 'dspy.predict.duration_ms' => payload[:duration_ms],
465
+ 'dspy.predict.input_size' => payload[:input_size]
466
+ }
467
+ ) do |span|
468
+ if payload[:status] == 'error'
469
+ span.record_exception(payload[:error_message] || 'Prediction failed')
470
+ span.status = OpenTelemetry::Trace::Status.error('Prediction failed')
471
+ end
472
+ end
473
+ end
474
+
475
+ sig { params(event: T.untyped).void }
476
+ def handle_chain_of_thought(event)
477
+ return unless @tracer
478
+
479
+ payload = event.payload
480
+
481
+ @tracer.in_span(
482
+ 'dspy.chain_of_thought',
483
+ attributes: {
484
+ 'dspy.operation' => 'chain_of_thought',
485
+ 'dspy.signature' => payload[:signature_class],
486
+ 'dspy.cot.status' => payload[:status],
487
+ 'dspy.cot.duration_ms' => payload[:duration_ms],
488
+ 'dspy.cot.reasoning_steps' => payload[:reasoning_steps],
489
+ 'dspy.cot.reasoning_length' => payload[:reasoning_length]
490
+ }
491
+ ) do |span|
492
+ if payload[:status] == 'error'
493
+ span.record_exception(payload[:error_message] || 'Chain of thought failed')
494
+ span.status = OpenTelemetry::Trace::Status.error('Chain of thought failed')
495
+ end
496
+ end
497
+ end
498
+
499
+ # Storage event handlers
500
+ sig { params(event: T.untyped, operation: String).void }
501
+ def handle_storage_operation(event, operation)
502
+ return unless @tracer
503
+
504
+ payload = event.payload
505
+
506
+ @tracer.in_span(
507
+ "dspy.storage.#{operation}",
508
+ attributes: {
509
+ 'dspy.operation' => "storage_#{operation}",
510
+ 'dspy.storage.program_id' => payload[:program_id],
511
+ 'dspy.storage.size_bytes' => payload[:size_bytes]
512
+ }
513
+ ) do |span|
514
+ # Span will auto-complete
515
+ end
516
+ end
517
+
518
+ # Registry event handlers
519
+ sig { params(event: T.untyped, operation: String).void }
520
+ def handle_registry_operation(event, operation)
521
+ return unless @tracer
522
+
523
+ payload = event.payload
524
+
525
+ @tracer.in_span(
526
+ "dspy.registry.#{operation}",
527
+ attributes: {
528
+ 'dspy.operation' => "registry_#{operation}",
529
+ 'dspy.registry.signature_name' => payload[:signature_name],
530
+ 'dspy.registry.version' => payload[:version]
531
+ }
532
+ ) do |span|
533
+ # Span will auto-complete
534
+ end
535
+ end
536
+ end
537
+ end
538
+ end
@@ -0,0 +1,107 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'sorbet-runtime'
4
+ require_relative '../example'
5
+
6
+ module DSPy
7
+ module Teleprompt
8
+ # Data handling for optimization with efficient operations
9
+ # Provides operations for large datasets during bootstrap and optimization
10
+ class DataHandler
11
+ extend T::Sig
12
+
13
+ sig { returns(T::Array[T.untyped]) }
14
+ attr_reader :examples
15
+
16
+ sig { params(examples: T::Array[T.untyped]).void }
17
+ def initialize(examples)
18
+ @examples = examples
19
+ end
20
+
21
+ # Sample examples efficiently
22
+ sig { params(n: Integer, random_state: T.nilable(Integer)).returns(T::Array[T.untyped]) }
23
+ def sample(n, random_state: nil)
24
+ return [] if @examples.empty? || n <= 0
25
+
26
+ # Handle case where n is larger than available examples
27
+ actual_n = [n, @examples.size].min
28
+
29
+ # Set random seed if provided
30
+ if random_state
31
+ srand(random_state)
32
+ end
33
+
34
+ @examples.sample(actual_n)
35
+ end
36
+
37
+ # Shuffle examples efficiently
38
+ sig { params(random_state: T.nilable(Integer)).returns(T::Array[T.untyped]) }
39
+ def shuffle(random_state: nil)
40
+ if random_state
41
+ srand(random_state)
42
+ end
43
+
44
+ @examples.shuffle
45
+ end
46
+
47
+ # Get examples in batches for processing
48
+ sig { params(batch_size: Integer).returns(T::Enumerator[T::Array[T.untyped]]) }
49
+ def each_batch(batch_size)
50
+ @examples.each_slice(batch_size)
51
+ end
52
+
53
+ # Filter examples based on success/failure
54
+ sig { params(successful_indices: T::Array[Integer]).returns([T::Array[T.untyped], T::Array[T.untyped]]) }
55
+ def partition_by_success(successful_indices)
56
+ successful_examples = successful_indices.map { |i| @examples[i] if i < @examples.size }.compact
57
+ failed_indices = (0...@examples.size).to_a - successful_indices
58
+ failed_examples = failed_indices.map { |i| @examples[i] }
59
+
60
+ [successful_examples, failed_examples]
61
+ end
62
+
63
+ # Create stratified samples maintaining distribution
64
+ sig { params(n: Integer, stratify_column: T.nilable(String)).returns(T::Array[T.untyped]) }
65
+ def stratified_sample(n, stratify_column: nil)
66
+ # For now, fall back to regular sampling (can be enhanced later)
67
+ sample(n)
68
+ end
69
+
70
+ # Get statistics about the data
71
+ sig { returns(T::Hash[Symbol, T.untyped]) }
72
+ def statistics
73
+ {
74
+ total_examples: @examples.size,
75
+ example_types: @examples.map(&:class).uniq.map(&:name),
76
+ memory_usage_estimate: @examples.size * 1000 # Rough estimate
77
+ }
78
+ end
79
+
80
+ # Create multiple candidate sets efficiently
81
+ sig { params(num_sets: Integer, set_size: Integer, random_state: T.nilable(Integer)).returns(T::Array[T::Array[T.untyped]]) }
82
+ def create_candidate_sets(num_sets, set_size, random_state: nil)
83
+ return Array.new(num_sets) { [] } if @examples.empty?
84
+
85
+ if random_state
86
+ srand(random_state)
87
+ end
88
+
89
+ candidate_sets = []
90
+ actual_set_size = [set_size, @examples.size].min
91
+
92
+ num_sets.times do |i|
93
+ # Use different random state for each set to ensure variety
94
+ current_seed = random_state ? random_state + i : nil
95
+ if current_seed
96
+ srand(current_seed)
97
+ end
98
+
99
+ set_examples = @examples.sample(actual_set_size)
100
+ candidate_sets << set_examples
101
+ end
102
+
103
+ candidate_sets
104
+ end
105
+ end
106
+ end
107
+ end