dspy 0.3.1 → 0.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,669 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'sorbet-runtime'
4
+
5
+ begin
6
+ require 'langfuse'
7
+ rescue LoadError
8
+ # Langfuse is optional - will be no-op if not available
9
+ end
10
+
11
+ module DSPy
12
+ module Subscribers
13
+ # Langfuse subscriber that provides comprehensive LLM observability for DSPy operations
14
+ # Tracks prompts, completions, optimization traces, and performance metrics
15
+ class LangfuseSubscriber
16
+ extend T::Sig
17
+
18
+ # Configuration for Langfuse integration
19
+ class LangfuseConfig
20
+ extend T::Sig
21
+
22
+ sig { returns(T::Boolean) }
23
+ attr_accessor :enabled
24
+
25
+ sig { returns(T.nilable(String)) }
26
+ attr_accessor :public_key
27
+
28
+ sig { returns(T.nilable(String)) }
29
+ attr_accessor :secret_key
30
+
31
+ sig { returns(T.nilable(String)) }
32
+ attr_accessor :host
33
+
34
+ sig { returns(T::Boolean) }
35
+ attr_accessor :trace_optimizations
36
+
37
+ sig { returns(T::Boolean) }
38
+ attr_accessor :trace_lm_calls
39
+
40
+ sig { returns(T::Boolean) }
41
+ attr_accessor :trace_evaluations
42
+
43
+ sig { returns(T::Boolean) }
44
+ attr_accessor :log_prompts
45
+
46
+ sig { returns(T::Boolean) }
47
+ attr_accessor :log_completions
48
+
49
+ sig { returns(T::Boolean) }
50
+ attr_accessor :calculate_costs
51
+
52
+ sig { returns(T::Hash[String, T.untyped]) }
53
+ attr_accessor :default_tags
54
+
55
+ sig { void }
56
+ def initialize
57
+ @enabled = !!(defined?(Langfuse) && ENV['LANGFUSE_SECRET_KEY'])
58
+ @public_key = ENV['LANGFUSE_PUBLIC_KEY']
59
+ @secret_key = ENV['LANGFUSE_SECRET_KEY']
60
+ @host = ENV['LANGFUSE_HOST'] || 'https://cloud.langfuse.com'
61
+ @trace_optimizations = true
62
+ @trace_lm_calls = true
63
+ @trace_evaluations = true
64
+ @log_prompts = true
65
+ @log_completions = true
66
+ @calculate_costs = true
67
+ @default_tags = { 'framework' => 'dspy-ruby' }
68
+ end
69
+ end
70
+
71
+ sig { returns(LangfuseConfig) }
72
+ attr_reader :config
73
+
74
+ sig { params(config: T.nilable(LangfuseConfig)).void }
75
+ def initialize(config: nil)
76
+ @config = config || LangfuseConfig.new
77
+ @langfuse = T.let(nil, T.nilable(T.untyped))
78
+ @optimization_traces = T.let({}, T::Hash[String, T.untyped])
79
+ @trial_spans = T.let({}, T::Hash[String, T.untyped])
80
+ @lm_generations = T.let({}, T::Hash[String, T.untyped])
81
+
82
+ setup_langfuse if @config.enabled
83
+ setup_event_subscriptions
84
+ end
85
+
86
+ private
87
+
88
+ sig { void }
89
+ def setup_langfuse
90
+ return unless defined?(Langfuse) && @config.secret_key
91
+
92
+ @langfuse = Langfuse.new(
93
+ public_key: @config.public_key,
94
+ secret_key: @config.secret_key,
95
+ host: @config.host
96
+ )
97
+ rescue => error
98
+ warn "Failed to setup Langfuse: #{error.message}"
99
+ @config.enabled = false
100
+ end
101
+
102
+ sig { void }
103
+ def setup_event_subscriptions
104
+ return unless @config.enabled && @langfuse
105
+
106
+ # Subscribe to optimization events
107
+ if @config.trace_optimizations
108
+ setup_optimization_subscriptions
109
+ end
110
+
111
+ # Subscribe to LM events
112
+ if @config.trace_lm_calls
113
+ setup_lm_subscriptions
114
+ end
115
+
116
+ # Subscribe to evaluation events
117
+ if @config.trace_evaluations
118
+ setup_evaluation_subscriptions
119
+ end
120
+
121
+ # Subscribe to storage and registry events for context
122
+ setup_context_subscriptions
123
+ end
124
+
125
+ sig { void }
126
+ def setup_optimization_subscriptions
127
+ DSPy::Instrumentation.subscribe('dspy.optimization.start') do |event|
128
+ handle_optimization_start(event)
129
+ end
130
+
131
+ DSPy::Instrumentation.subscribe('dspy.optimization.complete') do |event|
132
+ handle_optimization_complete(event)
133
+ end
134
+
135
+ DSPy::Instrumentation.subscribe('dspy.optimization.trial_start') do |event|
136
+ handle_trial_start(event)
137
+ end
138
+
139
+ DSPy::Instrumentation.subscribe('dspy.optimization.trial_complete') do |event|
140
+ handle_trial_complete(event)
141
+ end
142
+
143
+ DSPy::Instrumentation.subscribe('dspy.optimization.error') do |event|
144
+ handle_optimization_error(event)
145
+ end
146
+ end
147
+
148
+ sig { void }
149
+ def setup_lm_subscriptions
150
+ DSPy::Instrumentation.subscribe('dspy.lm.request') do |event|
151
+ handle_lm_request(event)
152
+ end
153
+
154
+ DSPy::Instrumentation.subscribe('dspy.predict') do |event|
155
+ handle_prediction(event)
156
+ end
157
+
158
+ DSPy::Instrumentation.subscribe('dspy.chain_of_thought') do |event|
159
+ handle_chain_of_thought(event)
160
+ end
161
+ end
162
+
163
+ sig { void }
164
+ def setup_evaluation_subscriptions
165
+ DSPy::Instrumentation.subscribe('dspy.evaluation.start') do |event|
166
+ handle_evaluation_start(event)
167
+ end
168
+
169
+ DSPy::Instrumentation.subscribe('dspy.evaluation.batch_complete') do |event|
170
+ handle_evaluation_complete(event)
171
+ end
172
+ end
173
+
174
+ sig { void }
175
+ def setup_context_subscriptions
176
+ DSPy::Instrumentation.subscribe('dspy.registry.deploy_complete') do |event|
177
+ handle_deployment(event)
178
+ end
179
+
180
+ DSPy::Instrumentation.subscribe('dspy.registry.auto_deployment') do |event|
181
+ handle_auto_deployment(event)
182
+ end
183
+
184
+ DSPy::Instrumentation.subscribe('dspy.registry.automatic_rollback') do |event|
185
+ handle_automatic_rollback(event)
186
+ end
187
+ end
188
+
189
+ # Optimization event handlers
190
+ sig { params(event: T.untyped).void }
191
+ def handle_optimization_start(event)
192
+ return unless @langfuse
193
+
194
+ payload = event.payload
195
+ optimization_id = payload[:optimization_id] || SecureRandom.uuid
196
+
197
+ trace = @langfuse.trace(
198
+ id: optimization_id,
199
+ name: "DSPy Optimization",
200
+ metadata: {
201
+ optimizer: payload[:optimizer] || 'unknown',
202
+ trainset_size: payload[:trainset_size],
203
+ valset_size: payload[:valset_size],
204
+ config: payload[:config]
205
+ },
206
+ tags: @config.default_tags.merge(
207
+ 'operation' => 'optimization',
208
+ 'optimizer' => payload[:optimizer] || 'unknown'
209
+ )
210
+ )
211
+
212
+ @optimization_traces[optimization_id] = trace
213
+
214
+ # Log optimization event
215
+ @langfuse.event(
216
+ trace_id: optimization_id,
217
+ name: "optimization_started",
218
+ metadata: {
219
+ optimizer: payload[:optimizer],
220
+ dataset_sizes: {
221
+ train: payload[:trainset_size],
222
+ validation: payload[:valset_size]
223
+ }
224
+ }
225
+ )
226
+ end
227
+
228
+ sig { params(event: T.untyped).void }
229
+ def handle_optimization_complete(event)
230
+ return unless @langfuse
231
+
232
+ payload = event.payload
233
+ optimization_id = payload[:optimization_id]
234
+ trace = @optimization_traces.delete(optimization_id)
235
+
236
+ return unless trace
237
+
238
+ # Update trace with final results
239
+ trace.update(
240
+ output: {
241
+ best_score: payload[:best_score],
242
+ trials_count: payload[:trials_count],
243
+ final_instruction: payload[:final_instruction]
244
+ },
245
+ metadata: {
246
+ duration_ms: payload[:duration_ms],
247
+ status: 'success'
248
+ }
249
+ )
250
+
251
+ # Log completion event
252
+ @langfuse.event(
253
+ trace_id: optimization_id,
254
+ name: "optimization_completed",
255
+ metadata: {
256
+ best_score: payload[:best_score],
257
+ trials_count: payload[:trials_count],
258
+ duration_ms: payload[:duration_ms]
259
+ }
260
+ )
261
+
262
+ # Calculate and log optimization score
263
+ if payload[:best_score]
264
+ @langfuse.score(
265
+ trace_id: optimization_id,
266
+ name: "optimization_performance",
267
+ value: payload[:best_score],
268
+ comment: "Best optimization score achieved"
269
+ )
270
+ end
271
+ end
272
+
273
+ sig { params(event: T.untyped).void }
274
+ def handle_trial_start(event)
275
+ return unless @langfuse
276
+
277
+ payload = event.payload
278
+ optimization_id = payload[:optimization_id]
279
+ trial_id = "#{optimization_id}_#{payload[:trial_number]}"
280
+
281
+ span = @langfuse.span(
282
+ trace_id: optimization_id,
283
+ name: "Optimization Trial",
284
+ input: {
285
+ trial_number: payload[:trial_number],
286
+ instruction: payload[:instruction],
287
+ examples_count: payload[:examples_count]
288
+ },
289
+ metadata: {
290
+ trial_number: payload[:trial_number]
291
+ }
292
+ )
293
+
294
+ @trial_spans[trial_id] = span
295
+
296
+ # Log trial event
297
+ @langfuse.event(
298
+ trace_id: optimization_id,
299
+ name: "trial_started",
300
+ metadata: {
301
+ trial_number: payload[:trial_number],
302
+ instruction_preview: payload[:instruction]&.slice(0, 100)
303
+ }
304
+ )
305
+ end
306
+
307
+ sig { params(event: T.untyped).void }
308
+ def handle_trial_complete(event)
309
+ return unless @langfuse
310
+
311
+ payload = event.payload
312
+ optimization_id = payload[:optimization_id]
313
+ trial_id = "#{optimization_id}_#{payload[:trial_number]}"
314
+ span = @trial_spans.delete(trial_id)
315
+
316
+ return unless span
317
+
318
+ status = payload[:status] || 'success'
319
+
320
+ # Update span with results
321
+ span.update(
322
+ output: {
323
+ score: payload[:score],
324
+ status: status
325
+ },
326
+ metadata: {
327
+ duration_ms: payload[:duration_ms],
328
+ error: payload[:error_message]
329
+ },
330
+ level: status == 'error' ? 'ERROR' : 'INFO'
331
+ )
332
+
333
+ # Log trial completion
334
+ @langfuse.event(
335
+ trace_id: optimization_id,
336
+ name: "trial_completed",
337
+ metadata: {
338
+ trial_number: payload[:trial_number],
339
+ score: payload[:score],
340
+ status: status,
341
+ duration_ms: payload[:duration_ms]
342
+ }
343
+ )
344
+
345
+ # Add score if available
346
+ if payload[:score]
347
+ @langfuse.score(
348
+ trace_id: optimization_id,
349
+ name: "trial_score",
350
+ value: payload[:score],
351
+ comment: "Trial #{payload[:trial_number]} score"
352
+ )
353
+ end
354
+ end
355
+
356
+ sig { params(event: T.untyped).void }
357
+ def handle_optimization_error(event)
358
+ return unless @langfuse
359
+
360
+ payload = event.payload
361
+ optimization_id = payload[:optimization_id]
362
+ trace = @optimization_traces.delete(optimization_id)
363
+
364
+ if trace
365
+ trace.update(
366
+ output: {
367
+ error: payload[:error_message],
368
+ error_type: payload[:error_type]
369
+ },
370
+ metadata: {
371
+ status: 'error'
372
+ },
373
+ level: 'ERROR'
374
+ )
375
+ end
376
+
377
+ # Log error event
378
+ @langfuse.event(
379
+ trace_id: optimization_id,
380
+ name: "optimization_error",
381
+ metadata: {
382
+ error_message: payload[:error_message],
383
+ error_type: payload[:error_type],
384
+ optimizer: payload[:optimizer]
385
+ }
386
+ )
387
+ end
388
+
389
+ # LM event handlers
390
+ sig { params(event: T.untyped).void }
391
+ def handle_lm_request(event)
392
+ return unless @langfuse
393
+
394
+ payload = event.payload
395
+ request_id = payload[:request_id] || SecureRandom.uuid
396
+
397
+ # Create generation for LM request
398
+ generation = @langfuse.generation(
399
+ name: "LM Request",
400
+ model: payload[:gen_ai_request_model] || payload[:model] || 'unknown',
401
+ input: @config.log_prompts ? payload[:prompt] : nil,
402
+ output: @config.log_completions ? payload[:response] : nil,
403
+ metadata: {
404
+ provider: payload[:provider],
405
+ status: payload[:status],
406
+ duration_ms: payload[:duration_ms]
407
+ },
408
+ usage: build_usage_info(payload),
409
+ level: payload[:status] == 'error' ? 'ERROR' : 'INFO'
410
+ )
411
+
412
+ @lm_generations[request_id] = generation
413
+
414
+ # Log LM request event
415
+ @langfuse.event(
416
+ name: "lm_request",
417
+ metadata: {
418
+ provider: payload[:provider],
419
+ model: payload[:gen_ai_request_model] || payload[:model],
420
+ status: payload[:status],
421
+ duration_ms: payload[:duration_ms],
422
+ tokens_total: payload[:tokens_total],
423
+ cost: payload[:cost]
424
+ }
425
+ )
426
+
427
+ # Add cost information if available
428
+ if payload[:cost] && @config.calculate_costs
429
+ @langfuse.score(
430
+ name: "request_cost",
431
+ value: payload[:cost],
432
+ comment: "Cost of LM request"
433
+ )
434
+ end
435
+ end
436
+
437
+ sig { params(event: T.untyped).void }
438
+ def handle_prediction(event)
439
+ return unless @langfuse
440
+
441
+ payload = event.payload
442
+
443
+ # Create span for prediction
444
+ span = @langfuse.span(
445
+ name: "DSPy Prediction",
446
+ input: {
447
+ signature: payload[:signature_class],
448
+ input_size: payload[:input_size]
449
+ },
450
+ metadata: {
451
+ signature_class: payload[:signature_class],
452
+ status: payload[:status],
453
+ duration_ms: payload[:duration_ms]
454
+ },
455
+ level: payload[:status] == 'error' ? 'ERROR' : 'INFO'
456
+ )
457
+
458
+ # Log prediction event
459
+ @langfuse.event(
460
+ name: "prediction",
461
+ metadata: {
462
+ signature: payload[:signature_class],
463
+ status: payload[:status],
464
+ duration_ms: payload[:duration_ms]
465
+ }
466
+ )
467
+ end
468
+
469
+ sig { params(event: T.untyped).void }
470
+ def handle_chain_of_thought(event)
471
+ return unless @langfuse
472
+
473
+ payload = event.payload
474
+
475
+ # Create span for chain of thought
476
+ span = @langfuse.span(
477
+ name: "Chain of Thought",
478
+ input: {
479
+ signature: payload[:signature_class]
480
+ },
481
+ output: {
482
+ reasoning_steps: payload[:reasoning_steps],
483
+ reasoning_length: payload[:reasoning_length]
484
+ },
485
+ metadata: {
486
+ signature_class: payload[:signature_class],
487
+ status: payload[:status],
488
+ duration_ms: payload[:duration_ms]
489
+ },
490
+ level: payload[:status] == 'error' ? 'ERROR' : 'INFO'
491
+ )
492
+
493
+ # Log chain of thought event
494
+ @langfuse.event(
495
+ name: "chain_of_thought",
496
+ metadata: {
497
+ signature: payload[:signature_class],
498
+ reasoning_steps: payload[:reasoning_steps],
499
+ status: payload[:status],
500
+ duration_ms: payload[:duration_ms]
501
+ }
502
+ )
503
+ end
504
+
505
+ # Evaluation event handlers
506
+ sig { params(event: T.untyped).void }
507
+ def handle_evaluation_start(event)
508
+ return unless @langfuse
509
+
510
+ payload = event.payload
511
+ evaluation_id = payload[:evaluation_id] || SecureRandom.uuid
512
+
513
+ # Create trace for evaluation
514
+ trace = @langfuse.trace(
515
+ id: evaluation_id,
516
+ name: "DSPy Evaluation",
517
+ metadata: {
518
+ dataset_size: payload[:dataset_size],
519
+ metric_name: payload[:metric_name]
520
+ },
521
+ tags: @config.default_tags.merge(
522
+ 'operation' => 'evaluation'
523
+ )
524
+ )
525
+
526
+ # Log evaluation start
527
+ @langfuse.event(
528
+ trace_id: evaluation_id,
529
+ name: "evaluation_started",
530
+ metadata: {
531
+ dataset_size: payload[:dataset_size],
532
+ metric_name: payload[:metric_name]
533
+ }
534
+ )
535
+ end
536
+
537
+ sig { params(event: T.untyped).void }
538
+ def handle_evaluation_complete(event)
539
+ return unless @langfuse
540
+
541
+ payload = event.payload
542
+ evaluation_id = payload[:evaluation_id]
543
+
544
+ # Log evaluation completion
545
+ @langfuse.event(
546
+ trace_id: evaluation_id,
547
+ name: "evaluation_completed",
548
+ metadata: {
549
+ average_score: payload[:average_score],
550
+ scores: payload[:scores],
551
+ duration_ms: payload[:duration_ms]
552
+ }
553
+ )
554
+
555
+ # Add evaluation score
556
+ if payload[:average_score]
557
+ @langfuse.score(
558
+ trace_id: evaluation_id,
559
+ name: "evaluation_score",
560
+ value: payload[:average_score],
561
+ comment: "Average evaluation score"
562
+ )
563
+ end
564
+ end
565
+
566
+ # Context event handlers
567
+ sig { params(event: T.untyped).void }
568
+ def handle_deployment(event)
569
+ return unless @langfuse
570
+
571
+ payload = event.payload
572
+
573
+ @langfuse.event(
574
+ name: "signature_deployment",
575
+ metadata: {
576
+ signature_name: payload[:signature_name],
577
+ version: payload[:version],
578
+ performance_score: payload[:performance_score]
579
+ }
580
+ )
581
+ end
582
+
583
+ sig { params(event: T.untyped).void }
584
+ def handle_auto_deployment(event)
585
+ return unless @langfuse
586
+
587
+ payload = event.payload
588
+
589
+ @langfuse.event(
590
+ name: "auto_deployment",
591
+ metadata: {
592
+ signature_name: payload[:signature_name],
593
+ version: payload[:version],
594
+ trigger: 'automatic'
595
+ }
596
+ )
597
+ end
598
+
599
+ sig { params(event: T.untyped).void }
600
+ def handle_automatic_rollback(event)
601
+ return unless @langfuse
602
+
603
+ payload = event.payload
604
+
605
+ @langfuse.event(
606
+ name: "automatic_rollback",
607
+ metadata: {
608
+ signature_name: payload[:signature_name],
609
+ current_score: payload[:current_score],
610
+ previous_score: payload[:previous_score],
611
+ performance_drop: payload[:performance_drop]
612
+ }
613
+ )
614
+ end
615
+
616
+ # Helper methods
617
+ sig { params(payload: T.untyped).returns(T.nilable(T::Hash[Symbol, T.untyped])) }
618
+ def build_usage_info(payload)
619
+ return nil unless payload[:tokens_total] || payload[:tokens_input] || payload[:tokens_output]
620
+
621
+ usage = {}
622
+ usage[:input] = payload[:tokens_input] if payload[:tokens_input]
623
+ usage[:output] = payload[:tokens_output] if payload[:tokens_output]
624
+ usage[:total] = payload[:tokens_total] if payload[:tokens_total]
625
+ usage[:unit] = 'TOKENS'
626
+
627
+ usage
628
+ end
629
+
630
+ public
631
+
632
+ # Public API for manual tracing
633
+ sig { returns(T.nilable(T.untyped)) }
634
+ def langfuse_client
635
+ @langfuse
636
+ end
637
+
638
+ sig { params(name: String, metadata: T::Hash[Symbol, T.untyped]).returns(T.nilable(T.untyped)) }
639
+ def create_trace(name, metadata: {})
640
+ return nil unless @langfuse
641
+
642
+ @langfuse.trace(
643
+ name: name,
644
+ metadata: metadata,
645
+ tags: @config.default_tags
646
+ )
647
+ end
648
+
649
+ sig { params(trace_id: String, name: String, value: Float, comment: T.nilable(String)).void }
650
+ def add_score(trace_id, name, value, comment: nil)
651
+ return unless @langfuse
652
+
653
+ @langfuse.score(
654
+ trace_id: trace_id,
655
+ name: name,
656
+ value: value,
657
+ comment: comment
658
+ )
659
+ end
660
+
661
+ sig { void }
662
+ def flush
663
+ return unless @langfuse
664
+
665
+ @langfuse.flush
666
+ end
667
+ end
668
+ end
669
+ end