dspy 0.21.0 → 0.22.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/dspy/events/subscriber_mixin.rb +79 -0
- data/lib/dspy/events/subscribers.rb +43 -0
- data/lib/dspy/events/types.rb +218 -0
- data/lib/dspy/events.rb +83 -0
- data/lib/dspy/mixins/type_coercion.rb +21 -1
- data/lib/dspy/teleprompt/gepa.rb +637 -0
- data/lib/dspy/teleprompt/teleprompter.rb +1 -1
- data/lib/dspy/version.rb +1 -1
- data/lib/dspy.rb +93 -1
- metadata +9 -3
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 35e148ea7f8b9d9239489008409167bce63fce8bbb51798837573a93cc82bd73
|
4
|
+
data.tar.gz: 69304272af26457e557189b743c59bcddb25f9d05ba485e5fec1e61cee5be4ad
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 998377fc4c8d444029e83e9b01f5e65efd28df06abc07a8b120258a91ef6894c6a0b75ffa398526c305894fe9b5a22eb389b0e9646a1f26d341af6aea736101b
|
7
|
+
data.tar.gz: 77e0ccd6a18fd3495bd785acfc0d45555f7632b895bb9dbe583e1b42622270f9fb9f9f242cac43f01259971829decb7e8306ad070a2fe9e2a4a9c655bbeb675b
|
@@ -0,0 +1,79 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require 'sorbet-runtime'
|
4
|
+
|
5
|
+
module DSPy
|
6
|
+
module Events
|
7
|
+
# Mixin for adding class-level event subscriptions
|
8
|
+
# Provides a clean way to subscribe to events at the class level
|
9
|
+
# instead of requiring instance-based subscriptions
|
10
|
+
#
|
11
|
+
# Usage:
|
12
|
+
# class MyTracker
|
13
|
+
# include DSPy::Events::SubscriberMixin
|
14
|
+
#
|
15
|
+
# add_subscription('llm.*') do |name, attrs|
|
16
|
+
# # Handle LLM events globally for this class
|
17
|
+
# end
|
18
|
+
# end
|
19
|
+
module SubscriberMixin
|
20
|
+
extend T::Sig
|
21
|
+
|
22
|
+
def self.included(base)
|
23
|
+
base.extend(ClassMethods)
|
24
|
+
base.class_eval do
|
25
|
+
@event_subscriptions = []
|
26
|
+
@subscription_mutex = Mutex.new
|
27
|
+
|
28
|
+
# Initialize subscriptions when the class is first loaded
|
29
|
+
@subscriptions_initialized = false
|
30
|
+
end
|
31
|
+
end
|
32
|
+
|
33
|
+
module ClassMethods
|
34
|
+
extend T::Sig
|
35
|
+
|
36
|
+
# Add a class-level event subscription
|
37
|
+
sig { params(pattern: String, block: T.proc.params(arg0: String, arg1: T::Hash[T.any(String, Symbol), T.untyped]).void).returns(String) }
|
38
|
+
def add_subscription(pattern, &block)
|
39
|
+
subscription_mutex.synchronize do
|
40
|
+
subscription_id = DSPy.events.subscribe(pattern, &block)
|
41
|
+
event_subscriptions << subscription_id
|
42
|
+
subscription_id
|
43
|
+
end
|
44
|
+
end
|
45
|
+
|
46
|
+
# Remove all subscriptions for this class
|
47
|
+
sig { void }
|
48
|
+
def unsubscribe_all
|
49
|
+
subscription_mutex.synchronize do
|
50
|
+
event_subscriptions.each { |id| DSPy.events.unsubscribe(id) }
|
51
|
+
event_subscriptions.clear
|
52
|
+
end
|
53
|
+
end
|
54
|
+
|
55
|
+
# Get list of active subscription IDs
|
56
|
+
sig { returns(T::Array[String]) }
|
57
|
+
def subscriptions
|
58
|
+
subscription_mutex.synchronize do
|
59
|
+
event_subscriptions.dup
|
60
|
+
end
|
61
|
+
end
|
62
|
+
|
63
|
+
private
|
64
|
+
|
65
|
+
# Thread-safe access to subscriptions array
|
66
|
+
sig { returns(T::Array[String]) }
|
67
|
+
def event_subscriptions
|
68
|
+
@event_subscriptions ||= []
|
69
|
+
end
|
70
|
+
|
71
|
+
# Thread-safe access to mutex
|
72
|
+
sig { returns(Mutex) }
|
73
|
+
def subscription_mutex
|
74
|
+
@subscription_mutex ||= Mutex.new
|
75
|
+
end
|
76
|
+
end
|
77
|
+
end
|
78
|
+
end
|
79
|
+
end
|
@@ -0,0 +1,43 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module DSPy
|
4
|
+
module Events
|
5
|
+
# Base subscriber class for event-driven patterns
|
6
|
+
# This provides the foundation for creating custom event subscribers
|
7
|
+
#
|
8
|
+
# Example usage:
|
9
|
+
# class MySubscriber < DSPy::Events::BaseSubscriber
|
10
|
+
# def subscribe
|
11
|
+
# add_subscription('llm.*') do |event_name, attributes|
|
12
|
+
# # Handle LLM events
|
13
|
+
# end
|
14
|
+
# end
|
15
|
+
# end
|
16
|
+
#
|
17
|
+
# subscriber = MySubscriber.new
|
18
|
+
# # subscriber will start receiving events
|
19
|
+
# subscriber.unsubscribe # Clean up when done
|
20
|
+
class BaseSubscriber
|
21
|
+
def initialize
|
22
|
+
@subscriptions = []
|
23
|
+
end
|
24
|
+
|
25
|
+
def subscribe
|
26
|
+
raise NotImplementedError, "Subclasses must implement #subscribe"
|
27
|
+
end
|
28
|
+
|
29
|
+
def unsubscribe
|
30
|
+
@subscriptions.each { |id| DSPy.events.unsubscribe(id) }
|
31
|
+
@subscriptions.clear
|
32
|
+
end
|
33
|
+
|
34
|
+
protected
|
35
|
+
|
36
|
+
def add_subscription(pattern, &block)
|
37
|
+
subscription_id = DSPy.events.subscribe(pattern, &block)
|
38
|
+
@subscriptions << subscription_id
|
39
|
+
subscription_id
|
40
|
+
end
|
41
|
+
end
|
42
|
+
end
|
43
|
+
end
|
@@ -0,0 +1,218 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require 'sorbet-runtime'
|
4
|
+
|
5
|
+
module DSPy
|
6
|
+
module Events
|
7
|
+
# Base event structure using Sorbet T::Struct
|
8
|
+
class Event < T::Struct
|
9
|
+
const :name, String
|
10
|
+
const :timestamp, Time
|
11
|
+
const :attributes, T::Hash[T.any(String, Symbol), T.untyped], default: {}
|
12
|
+
|
13
|
+
def initialize(name:, timestamp: Time.now, attributes: {})
|
14
|
+
super(name: name, timestamp: timestamp, attributes: attributes)
|
15
|
+
end
|
16
|
+
|
17
|
+
def to_attributes
|
18
|
+
result = attributes.dup
|
19
|
+
result[:timestamp] = timestamp
|
20
|
+
result
|
21
|
+
end
|
22
|
+
end
|
23
|
+
|
24
|
+
# Token usage structure for LLM events
|
25
|
+
class TokenUsage < T::Struct
|
26
|
+
const :prompt_tokens, Integer
|
27
|
+
const :completion_tokens, Integer
|
28
|
+
|
29
|
+
def total_tokens
|
30
|
+
prompt_tokens + completion_tokens
|
31
|
+
end
|
32
|
+
end
|
33
|
+
|
34
|
+
# LLM operation events with semantic conventions
|
35
|
+
class LLMEvent < T::Struct
|
36
|
+
VALID_PROVIDERS = T.let(
|
37
|
+
['openai', 'anthropic', 'google', 'azure', 'ollama', 'together', 'groq', 'cohere'].freeze,
|
38
|
+
T::Array[String]
|
39
|
+
)
|
40
|
+
|
41
|
+
# Common event fields
|
42
|
+
const :name, String
|
43
|
+
const :timestamp, Time
|
44
|
+
|
45
|
+
# LLM-specific fields
|
46
|
+
const :provider, String
|
47
|
+
const :model, String
|
48
|
+
const :usage, T.nilable(TokenUsage), default: nil
|
49
|
+
const :duration_ms, T.nilable(Numeric), default: nil
|
50
|
+
const :temperature, T.nilable(Float), default: nil
|
51
|
+
const :max_tokens, T.nilable(Integer), default: nil
|
52
|
+
const :stream, T.nilable(T::Boolean), default: nil
|
53
|
+
|
54
|
+
def initialize(name:, provider:, model:, timestamp: Time.now, usage: nil, duration_ms: nil, temperature: nil, max_tokens: nil, stream: nil)
|
55
|
+
unless VALID_PROVIDERS.include?(provider.downcase)
|
56
|
+
raise ArgumentError, "Invalid provider '#{provider}'. Must be one of: #{VALID_PROVIDERS.join(', ')}"
|
57
|
+
end
|
58
|
+
super(
|
59
|
+
name: name,
|
60
|
+
timestamp: timestamp,
|
61
|
+
provider: provider.downcase,
|
62
|
+
model: model,
|
63
|
+
usage: usage,
|
64
|
+
duration_ms: duration_ms,
|
65
|
+
temperature: temperature,
|
66
|
+
max_tokens: max_tokens,
|
67
|
+
stream: stream
|
68
|
+
)
|
69
|
+
end
|
70
|
+
|
71
|
+
def to_otel_attributes
|
72
|
+
attrs = {
|
73
|
+
'gen_ai.system' => provider,
|
74
|
+
'gen_ai.request.model' => model
|
75
|
+
}
|
76
|
+
|
77
|
+
if usage
|
78
|
+
attrs['gen_ai.usage.prompt_tokens'] = usage.prompt_tokens
|
79
|
+
attrs['gen_ai.usage.completion_tokens'] = usage.completion_tokens
|
80
|
+
attrs['gen_ai.usage.total_tokens'] = usage.total_tokens
|
81
|
+
end
|
82
|
+
|
83
|
+
attrs['gen_ai.request.temperature'] = temperature if temperature
|
84
|
+
attrs['gen_ai.request.max_tokens'] = max_tokens if max_tokens
|
85
|
+
attrs['gen_ai.request.stream'] = stream if stream
|
86
|
+
attrs['duration_ms'] = duration_ms if duration_ms
|
87
|
+
|
88
|
+
attrs
|
89
|
+
end
|
90
|
+
|
91
|
+
def to_attributes
|
92
|
+
result = to_otel_attributes.dup
|
93
|
+
result[:timestamp] = timestamp
|
94
|
+
result[:provider] = provider
|
95
|
+
result[:model] = model
|
96
|
+
result[:duration_ms] = duration_ms if duration_ms
|
97
|
+
result
|
98
|
+
end
|
99
|
+
end
|
100
|
+
|
101
|
+
# DSPy module execution events
|
102
|
+
class ModuleEvent < T::Struct
|
103
|
+
# Common event fields
|
104
|
+
const :name, String
|
105
|
+
const :timestamp, Time
|
106
|
+
|
107
|
+
# Module-specific fields
|
108
|
+
const :module_name, String
|
109
|
+
const :signature_name, T.nilable(String), default: nil
|
110
|
+
const :input_fields, T.nilable(T::Array[String]), default: nil
|
111
|
+
const :output_fields, T.nilable(T::Array[String]), default: nil
|
112
|
+
const :duration_ms, T.nilable(Numeric), default: nil
|
113
|
+
const :success, T.nilable(T::Boolean), default: nil
|
114
|
+
|
115
|
+
def initialize(name:, module_name:, timestamp: Time.now, signature_name: nil, input_fields: nil, output_fields: nil, duration_ms: nil, success: nil)
|
116
|
+
super(
|
117
|
+
name: name,
|
118
|
+
timestamp: timestamp,
|
119
|
+
module_name: module_name,
|
120
|
+
signature_name: signature_name,
|
121
|
+
input_fields: input_fields,
|
122
|
+
output_fields: output_fields,
|
123
|
+
duration_ms: duration_ms,
|
124
|
+
success: success
|
125
|
+
)
|
126
|
+
end
|
127
|
+
|
128
|
+
def to_attributes
|
129
|
+
result = { timestamp: timestamp }
|
130
|
+
result[:module_name] = module_name
|
131
|
+
result[:signature_name] = signature_name if signature_name
|
132
|
+
result[:input_fields] = input_fields if input_fields
|
133
|
+
result[:output_fields] = output_fields if output_fields
|
134
|
+
result[:duration_ms] = duration_ms if duration_ms
|
135
|
+
result[:success] = success if success
|
136
|
+
result
|
137
|
+
end
|
138
|
+
end
|
139
|
+
|
140
|
+
# Optimization and training events
|
141
|
+
class OptimizationEvent < T::Struct
|
142
|
+
# Common event fields
|
143
|
+
const :name, String
|
144
|
+
const :timestamp, Time
|
145
|
+
|
146
|
+
# Optimization-specific fields
|
147
|
+
const :optimizer_name, String
|
148
|
+
const :trial_number, T.nilable(Integer), default: nil
|
149
|
+
const :score, T.nilable(Float), default: nil
|
150
|
+
const :best_score, T.nilable(Float), default: nil
|
151
|
+
const :parameters, T.nilable(T::Hash[T.any(String, Symbol), T.untyped]), default: nil
|
152
|
+
const :duration_ms, T.nilable(Numeric), default: nil
|
153
|
+
|
154
|
+
def initialize(name:, optimizer_name:, timestamp: Time.now, trial_number: nil, score: nil, best_score: nil, parameters: nil, duration_ms: nil)
|
155
|
+
super(
|
156
|
+
name: name,
|
157
|
+
timestamp: timestamp,
|
158
|
+
optimizer_name: optimizer_name,
|
159
|
+
trial_number: trial_number,
|
160
|
+
score: score,
|
161
|
+
best_score: best_score,
|
162
|
+
parameters: parameters,
|
163
|
+
duration_ms: duration_ms
|
164
|
+
)
|
165
|
+
end
|
166
|
+
|
167
|
+
def to_attributes
|
168
|
+
result = { timestamp: timestamp }
|
169
|
+
result[:optimizer_name] = optimizer_name
|
170
|
+
result[:trial_number] = trial_number if trial_number
|
171
|
+
result[:score] = score if score
|
172
|
+
result[:best_score] = best_score if best_score
|
173
|
+
result[:parameters] = parameters if parameters
|
174
|
+
result[:duration_ms] = duration_ms if duration_ms
|
175
|
+
result
|
176
|
+
end
|
177
|
+
end
|
178
|
+
|
179
|
+
# Evaluation events
|
180
|
+
class EvaluationEvent < T::Struct
|
181
|
+
# Common event fields
|
182
|
+
const :name, String
|
183
|
+
const :timestamp, Time
|
184
|
+
|
185
|
+
# Evaluation-specific fields
|
186
|
+
const :evaluator_name, String
|
187
|
+
const :metric_name, T.nilable(String), default: nil
|
188
|
+
const :score, T.nilable(Float), default: nil
|
189
|
+
const :total_examples, T.nilable(Integer), default: nil
|
190
|
+
const :passed_examples, T.nilable(Integer), default: nil
|
191
|
+
const :duration_ms, T.nilable(Numeric), default: nil
|
192
|
+
|
193
|
+
def initialize(name:, evaluator_name:, timestamp: Time.now, metric_name: nil, score: nil, total_examples: nil, passed_examples: nil, duration_ms: nil)
|
194
|
+
super(
|
195
|
+
name: name,
|
196
|
+
timestamp: timestamp,
|
197
|
+
evaluator_name: evaluator_name,
|
198
|
+
metric_name: metric_name,
|
199
|
+
score: score,
|
200
|
+
total_examples: total_examples,
|
201
|
+
passed_examples: passed_examples,
|
202
|
+
duration_ms: duration_ms
|
203
|
+
)
|
204
|
+
end
|
205
|
+
|
206
|
+
def to_attributes
|
207
|
+
result = { timestamp: timestamp }
|
208
|
+
result[:evaluator_name] = evaluator_name
|
209
|
+
result[:metric_name] = metric_name if metric_name
|
210
|
+
result[:score] = score if score
|
211
|
+
result[:total_examples] = total_examples if total_examples
|
212
|
+
result[:passed_examples] = passed_examples if passed_examples
|
213
|
+
result[:duration_ms] = duration_ms if duration_ms
|
214
|
+
result
|
215
|
+
end
|
216
|
+
end
|
217
|
+
end
|
218
|
+
end
|
data/lib/dspy/events.rb
ADDED
@@ -0,0 +1,83 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require 'securerandom'
|
4
|
+
|
5
|
+
module DSPy
|
6
|
+
# Events module to hold typed event structures
|
7
|
+
module Events
|
8
|
+
# Will be defined in events/types.rb
|
9
|
+
end
|
10
|
+
|
11
|
+
class EventRegistry
|
12
|
+
def initialize
|
13
|
+
@listeners = {}
|
14
|
+
@subscription_counter = 0
|
15
|
+
@mutex = Mutex.new
|
16
|
+
end
|
17
|
+
|
18
|
+
def subscribe(pattern, &block)
|
19
|
+
return unless block_given?
|
20
|
+
|
21
|
+
subscription_id = SecureRandom.uuid
|
22
|
+
@mutex.synchronize do
|
23
|
+
@listeners[subscription_id] = {
|
24
|
+
pattern: pattern,
|
25
|
+
block: block
|
26
|
+
}
|
27
|
+
end
|
28
|
+
|
29
|
+
subscription_id
|
30
|
+
end
|
31
|
+
|
32
|
+
def unsubscribe(subscription_id)
|
33
|
+
@mutex.synchronize do
|
34
|
+
@listeners.delete(subscription_id)
|
35
|
+
end
|
36
|
+
end
|
37
|
+
|
38
|
+
def clear_listeners
|
39
|
+
@mutex.synchronize do
|
40
|
+
@listeners.clear
|
41
|
+
end
|
42
|
+
end
|
43
|
+
|
44
|
+
def notify(event_name, attributes)
|
45
|
+
# Take a snapshot of current listeners to avoid holding the mutex during execution
|
46
|
+
# This allows listeners to be modified while others are executing
|
47
|
+
matching_listeners = @mutex.synchronize do
|
48
|
+
@listeners.select do |id, listener|
|
49
|
+
pattern_matches?(listener[:pattern], event_name)
|
50
|
+
end.dup # Create a copy to avoid shared state
|
51
|
+
end
|
52
|
+
|
53
|
+
matching_listeners.each do |id, listener|
|
54
|
+
begin
|
55
|
+
listener[:block].call(event_name, attributes)
|
56
|
+
rescue => e
|
57
|
+
# Log the error but continue processing other listeners
|
58
|
+
# Use emit_log directly to avoid infinite recursion
|
59
|
+
DSPy.send(:emit_log, 'event.listener.error', {
|
60
|
+
subscription_id: id,
|
61
|
+
error_class: e.class.name,
|
62
|
+
error_message: e.message,
|
63
|
+
event_name: event_name
|
64
|
+
})
|
65
|
+
end
|
66
|
+
end
|
67
|
+
end
|
68
|
+
|
69
|
+
private
|
70
|
+
|
71
|
+
def pattern_matches?(pattern, event_name)
|
72
|
+
if pattern.include?('*')
|
73
|
+
# Convert wildcard pattern to regex
|
74
|
+
# llm.* becomes ^llm\..*$
|
75
|
+
regex_pattern = "^#{Regexp.escape(pattern).gsub('\\*', '.*')}$"
|
76
|
+
Regexp.new(regex_pattern).match?(event_name)
|
77
|
+
else
|
78
|
+
# Exact match
|
79
|
+
pattern == event_name
|
80
|
+
end
|
81
|
+
end
|
82
|
+
end
|
83
|
+
end
|
@@ -140,8 +140,28 @@ module DSPy
|
|
140
140
|
# Convert string keys to symbols
|
141
141
|
symbolized_hash = value.transform_keys(&:to_sym)
|
142
142
|
|
143
|
+
# Get struct properties to understand what fields are expected
|
144
|
+
struct_props = struct_class.props
|
145
|
+
|
146
|
+
# Remove the _type field that DSPy adds for discriminating structs,
|
147
|
+
# but only if it's NOT a legitimate field in the struct definition
|
148
|
+
if !struct_props.key?(:_type) && symbolized_hash.key?(:_type)
|
149
|
+
symbolized_hash = symbolized_hash.except(:_type)
|
150
|
+
end
|
151
|
+
|
152
|
+
# Recursively coerce nested struct fields
|
153
|
+
coerced_hash = symbolized_hash.map do |key, val|
|
154
|
+
prop_info = struct_props[key]
|
155
|
+
if prop_info && prop_info[:type]
|
156
|
+
coerced_value = coerce_value_to_type(val, prop_info[:type])
|
157
|
+
[key, coerced_value]
|
158
|
+
else
|
159
|
+
[key, val]
|
160
|
+
end
|
161
|
+
end.to_h
|
162
|
+
|
143
163
|
# Create the struct instance
|
144
|
-
struct_class.new(**
|
164
|
+
struct_class.new(**coerced_hash)
|
145
165
|
rescue ArgumentError => e
|
146
166
|
# If struct creation fails, return the original value
|
147
167
|
DSPy.logger.debug("Failed to coerce to struct #{struct_class}: #{e.message}")
|
@@ -0,0 +1,637 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require 'sorbet-runtime'
|
4
|
+
require_relative 'teleprompter'
|
5
|
+
|
6
|
+
module DSPy
|
7
|
+
module Teleprompt
|
8
|
+
# GEPA: Genetic-Pareto Reflective Prompt Evolution optimizer
|
9
|
+
# Uses natural language reflection to evolve prompts through genetic algorithms
|
10
|
+
# and Pareto frontier selection for maintaining diverse high-performing candidates
|
11
|
+
class GEPA < Teleprompter
|
12
|
+
extend T::Sig
|
13
|
+
|
14
|
+
# Immutable execution trace record using Ruby's Data class
|
15
|
+
# Captures execution events for GEPA's reflective analysis
|
16
|
+
class ExecutionTrace < Data.define(
|
17
|
+
:trace_id,
|
18
|
+
:event_name,
|
19
|
+
:timestamp,
|
20
|
+
:span_id,
|
21
|
+
:attributes,
|
22
|
+
:metadata
|
23
|
+
)
|
24
|
+
extend T::Sig
|
25
|
+
|
26
|
+
# Type aliases for better type safety
|
27
|
+
AttributesHash = T.type_alias { T::Hash[T.any(String, Symbol), T.untyped] }
|
28
|
+
MetadataHash = T.type_alias { T::Hash[Symbol, T.untyped] }
|
29
|
+
|
30
|
+
sig do
|
31
|
+
params(
|
32
|
+
trace_id: String,
|
33
|
+
event_name: String,
|
34
|
+
timestamp: Time,
|
35
|
+
span_id: T.nilable(String),
|
36
|
+
attributes: AttributesHash,
|
37
|
+
metadata: T.nilable(MetadataHash)
|
38
|
+
).void
|
39
|
+
end
|
40
|
+
def initialize(trace_id:, event_name:, timestamp:, span_id: nil, attributes: {}, metadata: nil)
|
41
|
+
# Freeze nested structures for true immutability
|
42
|
+
frozen_attributes = attributes.freeze
|
43
|
+
frozen_metadata = metadata&.freeze
|
44
|
+
|
45
|
+
super(
|
46
|
+
trace_id: trace_id,
|
47
|
+
event_name: event_name,
|
48
|
+
timestamp: timestamp,
|
49
|
+
span_id: span_id,
|
50
|
+
attributes: frozen_attributes,
|
51
|
+
metadata: frozen_metadata
|
52
|
+
)
|
53
|
+
end
|
54
|
+
|
55
|
+
# Check if this is an LLM-related trace
|
56
|
+
sig { returns(T::Boolean) }
|
57
|
+
def llm_trace?
|
58
|
+
event_name.start_with?('llm.') || event_name.start_with?('lm.')
|
59
|
+
end
|
60
|
+
|
61
|
+
# Check if this is a module-related trace
|
62
|
+
sig { returns(T::Boolean) }
|
63
|
+
def module_trace?
|
64
|
+
!llm_trace? && (
|
65
|
+
event_name.include?('chain_of_thought') ||
|
66
|
+
event_name.include?('react') ||
|
67
|
+
event_name.include?('codeact') ||
|
68
|
+
event_name.include?('predict')
|
69
|
+
)
|
70
|
+
end
|
71
|
+
|
72
|
+
# Extract token usage from LLM traces
|
73
|
+
sig { returns(Integer) }
|
74
|
+
def token_usage
|
75
|
+
return 0 unless llm_trace?
|
76
|
+
|
77
|
+
# Try different token attribute keys
|
78
|
+
[
|
79
|
+
'gen_ai.usage.total_tokens',
|
80
|
+
'gen_ai.usage.prompt_tokens',
|
81
|
+
'tokens',
|
82
|
+
:tokens
|
83
|
+
].each do |key|
|
84
|
+
value = attributes[key]
|
85
|
+
return value.to_i if value
|
86
|
+
end
|
87
|
+
|
88
|
+
0
|
89
|
+
end
|
90
|
+
|
91
|
+
# Convert to hash representation
|
92
|
+
sig { returns(T::Hash[Symbol, T.untyped]) }
|
93
|
+
def to_h
|
94
|
+
{
|
95
|
+
trace_id: trace_id,
|
96
|
+
event_name: event_name,
|
97
|
+
timestamp: timestamp,
|
98
|
+
span_id: span_id,
|
99
|
+
attributes: attributes,
|
100
|
+
metadata: metadata
|
101
|
+
}
|
102
|
+
end
|
103
|
+
|
104
|
+
# Extract prompt text from trace
|
105
|
+
sig { returns(T.nilable(String)) }
|
106
|
+
def prompt_text
|
107
|
+
attributes[:prompt] || attributes['prompt']
|
108
|
+
end
|
109
|
+
|
110
|
+
# Extract response text from trace
|
111
|
+
sig { returns(T.nilable(String)) }
|
112
|
+
def response_text
|
113
|
+
attributes[:response] || attributes['response']
|
114
|
+
end
|
115
|
+
|
116
|
+
# Get the model used in this trace
|
117
|
+
sig { returns(T.nilable(String)) }
|
118
|
+
def model_name
|
119
|
+
attributes['gen_ai.request.model'] || attributes[:model]
|
120
|
+
end
|
121
|
+
|
122
|
+
# Get the signature class name
|
123
|
+
sig { returns(T.nilable(String)) }
|
124
|
+
def signature_name
|
125
|
+
attributes['dspy.signature'] || attributes[:signature]
|
126
|
+
end
|
127
|
+
end
|
128
|
+
|
129
|
+
# Immutable reflection analysis result using Ruby's Data class
|
130
|
+
# Stores the output of GEPA's reflective analysis on execution traces
|
131
|
+
class ReflectionResult < Data.define(
|
132
|
+
:trace_id,
|
133
|
+
:diagnosis,
|
134
|
+
:improvements,
|
135
|
+
:confidence,
|
136
|
+
:reasoning,
|
137
|
+
:suggested_mutations,
|
138
|
+
:metadata
|
139
|
+
)
|
140
|
+
extend T::Sig
|
141
|
+
|
142
|
+
# Type aliases for better type safety
|
143
|
+
ImprovementsList = T.type_alias { T::Array[String] }
|
144
|
+
MutationsList = T.type_alias { T::Array[Symbol] }
|
145
|
+
MetadataHash = T.type_alias { T::Hash[Symbol, T.untyped] }
|
146
|
+
|
147
|
+
sig do
|
148
|
+
params(
|
149
|
+
trace_id: String,
|
150
|
+
diagnosis: String,
|
151
|
+
improvements: ImprovementsList,
|
152
|
+
confidence: Float,
|
153
|
+
reasoning: String,
|
154
|
+
suggested_mutations: MutationsList,
|
155
|
+
metadata: MetadataHash
|
156
|
+
).void
|
157
|
+
end
|
158
|
+
def initialize(trace_id:, diagnosis:, improvements:, confidence:, reasoning:, suggested_mutations:, metadata:)
|
159
|
+
# Validate confidence score
|
160
|
+
if confidence < 0.0 || confidence > 1.0
|
161
|
+
raise ArgumentError, "confidence must be between 0 and 1, got #{confidence}"
|
162
|
+
end
|
163
|
+
|
164
|
+
# Freeze nested structures for true immutability
|
165
|
+
frozen_improvements = improvements.freeze
|
166
|
+
frozen_mutations = suggested_mutations.freeze
|
167
|
+
frozen_metadata = metadata.freeze
|
168
|
+
|
169
|
+
super(
|
170
|
+
trace_id: trace_id,
|
171
|
+
diagnosis: diagnosis,
|
172
|
+
improvements: frozen_improvements,
|
173
|
+
confidence: confidence,
|
174
|
+
reasoning: reasoning,
|
175
|
+
suggested_mutations: frozen_mutations,
|
176
|
+
metadata: frozen_metadata
|
177
|
+
)
|
178
|
+
end
|
179
|
+
|
180
|
+
# Check if this reflection has high confidence (>= 0.8)
|
181
|
+
sig { returns(T::Boolean) }
|
182
|
+
def high_confidence?
|
183
|
+
confidence >= 0.8
|
184
|
+
end
|
185
|
+
|
186
|
+
# Check if this reflection suggests actionable changes
|
187
|
+
sig { returns(T::Boolean) }
|
188
|
+
def actionable?
|
189
|
+
improvements.any? || suggested_mutations.any?
|
190
|
+
end
|
191
|
+
|
192
|
+
# Get mutations sorted by priority (simple alphabetical for Phase 1)
|
193
|
+
sig { returns(MutationsList) }
|
194
|
+
def mutation_priority
|
195
|
+
suggested_mutations.sort
|
196
|
+
end
|
197
|
+
|
198
|
+
# Convert to hash representation
|
199
|
+
sig { returns(T::Hash[Symbol, T.untyped]) }
|
200
|
+
def to_h
|
201
|
+
{
|
202
|
+
trace_id: trace_id,
|
203
|
+
diagnosis: diagnosis,
|
204
|
+
improvements: improvements,
|
205
|
+
confidence: confidence,
|
206
|
+
reasoning: reasoning,
|
207
|
+
suggested_mutations: suggested_mutations,
|
208
|
+
metadata: metadata
|
209
|
+
}
|
210
|
+
end
|
211
|
+
|
212
|
+
# Generate a concise summary of this reflection
|
213
|
+
sig { returns(String) }
|
214
|
+
def summary
|
215
|
+
confidence_pct = (confidence * 100).round
|
216
|
+
mutation_list = suggested_mutations.map(&:to_s).join(', ')
|
217
|
+
|
218
|
+
"#{diagnosis.split('.').first}. " \
|
219
|
+
"Confidence: #{confidence_pct}%. " \
|
220
|
+
"#{improvements.size} improvements suggested. " \
|
221
|
+
"Mutations: #{mutation_list}."
|
222
|
+
end
|
223
|
+
|
224
|
+
# Check if reflection model was used
|
225
|
+
sig { returns(T.nilable(String)) }
|
226
|
+
def reflection_model
|
227
|
+
metadata[:reflection_model]
|
228
|
+
end
|
229
|
+
|
230
|
+
# Get token usage from reflection analysis
|
231
|
+
sig { returns(Integer) }
|
232
|
+
def token_usage
|
233
|
+
metadata[:token_usage] || 0
|
234
|
+
end
|
235
|
+
|
236
|
+
# Get analysis duration in milliseconds
|
237
|
+
sig { returns(Integer) }
|
238
|
+
def analysis_duration_ms
|
239
|
+
metadata[:analysis_duration_ms] || 0
|
240
|
+
end
|
241
|
+
end
|
242
|
+
|
243
|
+
# TraceCollector aggregates execution traces from DSPy events
|
244
|
+
# Uses SubscriberMixin for class-level event subscriptions
|
245
|
+
class TraceCollector
|
246
|
+
include DSPy::Events::SubscriberMixin
|
247
|
+
extend T::Sig
|
248
|
+
|
249
|
+
sig { void }
|
250
|
+
def initialize
|
251
|
+
@traces = T.let([], T::Array[ExecutionTrace])
|
252
|
+
@traces_mutex = T.let(Mutex.new, Mutex)
|
253
|
+
setup_subscriptions
|
254
|
+
end
|
255
|
+
|
256
|
+
sig { returns(T::Array[ExecutionTrace]) }
|
257
|
+
attr_reader :traces
|
258
|
+
|
259
|
+
# Get count of collected traces
|
260
|
+
sig { returns(Integer) }
|
261
|
+
def collected_count
|
262
|
+
@traces_mutex.synchronize { @traces.size }
|
263
|
+
end
|
264
|
+
|
265
|
+
# Collect trace from event data
|
266
|
+
sig { params(event_name: String, event_data: T::Hash[T.any(String, Symbol), T.untyped]).void }
|
267
|
+
def collect_trace(event_name, event_data)
|
268
|
+
@traces_mutex.synchronize do
|
269
|
+
trace_id = event_data['trace_id'] || event_data[:trace_id] || generate_trace_id
|
270
|
+
|
271
|
+
# Avoid duplicates
|
272
|
+
return if @traces.any? { |t| t.trace_id == trace_id }
|
273
|
+
|
274
|
+
timestamp = event_data['timestamp'] || event_data[:timestamp] || Time.now
|
275
|
+
span_id = event_data['span_id'] || event_data[:span_id]
|
276
|
+
attributes = event_data['attributes'] || event_data[:attributes] || {}
|
277
|
+
metadata = event_data['metadata'] || event_data[:metadata] || {}
|
278
|
+
|
279
|
+
trace = ExecutionTrace.new(
|
280
|
+
trace_id: trace_id,
|
281
|
+
event_name: event_name,
|
282
|
+
timestamp: timestamp,
|
283
|
+
span_id: span_id,
|
284
|
+
attributes: attributes,
|
285
|
+
metadata: metadata
|
286
|
+
)
|
287
|
+
|
288
|
+
@traces << trace
|
289
|
+
end
|
290
|
+
end
|
291
|
+
|
292
|
+
# Get traces for a specific optimization run
|
293
|
+
sig { params(run_id: String).returns(T::Array[ExecutionTrace]) }
|
294
|
+
def traces_for_run(run_id)
|
295
|
+
@traces_mutex.synchronize do
|
296
|
+
@traces.select do |trace|
|
297
|
+
metadata = trace.metadata
|
298
|
+
metadata && metadata[:optimization_run_id] == run_id
|
299
|
+
end
|
300
|
+
end
|
301
|
+
end
|
302
|
+
|
303
|
+
# Get only LLM traces
|
304
|
+
sig { returns(T::Array[ExecutionTrace]) }
|
305
|
+
def llm_traces
|
306
|
+
@traces_mutex.synchronize { @traces.select(&:llm_trace?) }
|
307
|
+
end
|
308
|
+
|
309
|
+
# Get only module traces
|
310
|
+
sig { returns(T::Array[ExecutionTrace]) }
|
311
|
+
def module_traces
|
312
|
+
@traces_mutex.synchronize { @traces.select(&:module_trace?) }
|
313
|
+
end
|
314
|
+
|
315
|
+
# Clear all collected traces
|
316
|
+
sig { void }
|
317
|
+
def clear
|
318
|
+
@traces_mutex.synchronize { @traces.clear }
|
319
|
+
end
|
320
|
+
|
321
|
+
private
|
322
|
+
|
323
|
+
# Set up event subscriptions using SubscriberMixin
|
324
|
+
sig { void }
|
325
|
+
def setup_subscriptions
|
326
|
+
# Subscribe to LLM events
|
327
|
+
self.class.add_subscription('llm.*') do |name, attrs|
|
328
|
+
collect_trace(name, attrs)
|
329
|
+
end
|
330
|
+
|
331
|
+
# Subscribe to module events
|
332
|
+
self.class.add_subscription('*.reasoning_complete') do |name, attrs|
|
333
|
+
collect_trace(name, attrs)
|
334
|
+
end
|
335
|
+
|
336
|
+
self.class.add_subscription('*.predict_complete') do |name, attrs|
|
337
|
+
collect_trace(name, attrs)
|
338
|
+
end
|
339
|
+
end
|
340
|
+
|
341
|
+
# Generate unique trace ID
|
342
|
+
sig { returns(String) }
|
343
|
+
def generate_trace_id
|
344
|
+
"gepa-trace-#{SecureRandom.hex(4)}"
|
345
|
+
end
|
346
|
+
end
|
347
|
+
|
348
|
+
# ReflectionEngine performs natural language reflection on execution traces
|
349
|
+
# This is the core component that analyzes traces and generates improvement insights
|
350
|
+
class ReflectionEngine
|
351
|
+
extend T::Sig
|
352
|
+
|
353
|
+
sig { returns(GEPAConfig) }
|
354
|
+
attr_reader :config
|
355
|
+
|
356
|
+
sig { params(config: T.nilable(GEPAConfig)).void }
|
357
|
+
def initialize(config = nil)
|
358
|
+
@config = config || GEPAConfig.new
|
359
|
+
end
|
360
|
+
|
361
|
+
# Perform reflective analysis on execution traces
|
362
|
+
sig { params(traces: T::Array[ExecutionTrace]).returns(ReflectionResult) }
|
363
|
+
def reflect_on_traces(traces)
|
364
|
+
reflection_id = generate_reflection_id
|
365
|
+
|
366
|
+
if traces.empty?
|
367
|
+
return ReflectionResult.new(
|
368
|
+
trace_id: reflection_id,
|
369
|
+
diagnosis: 'No traces available for analysis',
|
370
|
+
improvements: [],
|
371
|
+
confidence: 0.0,
|
372
|
+
reasoning: 'Cannot provide reflection without execution traces',
|
373
|
+
suggested_mutations: [],
|
374
|
+
metadata: {
|
375
|
+
reflection_model: @config.reflection_lm,
|
376
|
+
analysis_timestamp: Time.now,
|
377
|
+
trace_count: 0
|
378
|
+
}
|
379
|
+
)
|
380
|
+
end
|
381
|
+
|
382
|
+
patterns = analyze_execution_patterns(traces)
|
383
|
+
improvements = generate_improvement_suggestions(patterns)
|
384
|
+
mutations = suggest_mutations(patterns)
|
385
|
+
|
386
|
+
# For Phase 1, we generate a simple rule-based analysis
|
387
|
+
# Future phases will use LLM-based reflection
|
388
|
+
diagnosis = generate_diagnosis(patterns)
|
389
|
+
reasoning = generate_reasoning(patterns, traces)
|
390
|
+
confidence = calculate_confidence(patterns)
|
391
|
+
|
392
|
+
ReflectionResult.new(
|
393
|
+
trace_id: reflection_id,
|
394
|
+
diagnosis: diagnosis,
|
395
|
+
improvements: improvements,
|
396
|
+
confidence: confidence,
|
397
|
+
reasoning: reasoning,
|
398
|
+
suggested_mutations: mutations,
|
399
|
+
metadata: {
|
400
|
+
reflection_model: @config.reflection_lm,
|
401
|
+
analysis_timestamp: Time.now,
|
402
|
+
trace_count: traces.size,
|
403
|
+
token_usage: 0 # Phase 1 doesn't use actual LLM reflection
|
404
|
+
}
|
405
|
+
)
|
406
|
+
end
|
407
|
+
|
408
|
+
# Analyze patterns in execution traces
|
409
|
+
sig { params(traces: T::Array[ExecutionTrace]).returns(T::Hash[Symbol, T.untyped]) }
|
410
|
+
def analyze_execution_patterns(traces)
|
411
|
+
llm_traces = traces.select(&:llm_trace?)
|
412
|
+
module_traces = traces.select(&:module_trace?)
|
413
|
+
|
414
|
+
total_tokens = llm_traces.sum(&:token_usage)
|
415
|
+
unique_models = llm_traces.map(&:model_name).compact.uniq
|
416
|
+
|
417
|
+
{
|
418
|
+
llm_traces_count: llm_traces.size,
|
419
|
+
module_traces_count: module_traces.size,
|
420
|
+
total_tokens: total_tokens,
|
421
|
+
unique_models: unique_models,
|
422
|
+
avg_response_length: calculate_avg_response_length(llm_traces),
|
423
|
+
trace_timespan: calculate_timespan(traces)
|
424
|
+
}
|
425
|
+
end
|
426
|
+
|
427
|
+
# Generate improvement suggestions based on patterns
|
428
|
+
sig { params(patterns: T::Hash[Symbol, T.untyped]).returns(T::Array[String]) }
|
429
|
+
def generate_improvement_suggestions(patterns)
|
430
|
+
suggestions = []
|
431
|
+
|
432
|
+
if patterns[:total_tokens] > 500
|
433
|
+
suggestions << 'Consider reducing prompt length to lower token usage'
|
434
|
+
end
|
435
|
+
|
436
|
+
if patterns[:avg_response_length] < 10
|
437
|
+
suggestions << 'Responses seem brief - consider asking for more detailed explanations'
|
438
|
+
end
|
439
|
+
|
440
|
+
if patterns[:llm_traces_count] > patterns[:module_traces_count] * 3
|
441
|
+
suggestions << 'High LLM usage detected - consider optimizing reasoning chains'
|
442
|
+
end
|
443
|
+
|
444
|
+
if patterns[:unique_models].size > 1
|
445
|
+
suggestions << 'Multiple models used - consider standardizing on one model for consistency'
|
446
|
+
end
|
447
|
+
|
448
|
+
suggestions << 'Add step-by-step reasoning instructions' if suggestions.empty?
|
449
|
+
suggestions
|
450
|
+
end
|
451
|
+
|
452
|
+
# Suggest mutation operations based on patterns
|
453
|
+
sig { params(patterns: T::Hash[Symbol, T.untyped]).returns(T::Array[Symbol]) }
|
454
|
+
def suggest_mutations(patterns)
|
455
|
+
mutations = []
|
456
|
+
|
457
|
+
avg_length = patterns[:avg_response_length] || 0
|
458
|
+
total_tokens = patterns[:total_tokens] || 0
|
459
|
+
llm_count = patterns[:llm_traces_count] || 0
|
460
|
+
|
461
|
+
mutations << :expand if avg_length < 15
|
462
|
+
mutations << :simplify if total_tokens > 300
|
463
|
+
mutations << :combine if llm_count > 2
|
464
|
+
mutations << :rewrite if llm_count == 1
|
465
|
+
mutations << :rephrase if mutations.empty?
|
466
|
+
|
467
|
+
mutations.uniq
|
468
|
+
end
|
469
|
+
|
470
|
+
private
|
471
|
+
|
472
|
+
# Generate unique reflection ID
|
473
|
+
sig { returns(String) }
|
474
|
+
def generate_reflection_id
|
475
|
+
"reflection-#{SecureRandom.hex(4)}"
|
476
|
+
end
|
477
|
+
|
478
|
+
# Generate diagnosis text
|
479
|
+
sig { params(patterns: T::Hash[Symbol, T.untyped]).returns(String) }
|
480
|
+
def generate_diagnosis(patterns)
|
481
|
+
if patterns[:total_tokens] > 400
|
482
|
+
'High token usage indicates potential inefficiency in prompt design'
|
483
|
+
elsif patterns[:llm_traces_count] == 0
|
484
|
+
'No LLM interactions found - execution may not be working as expected'
|
485
|
+
elsif patterns[:avg_response_length] < 10
|
486
|
+
'Responses are unusually brief which may indicate prompt clarity issues'
|
487
|
+
else
|
488
|
+
'Execution patterns appear normal with room for optimization'
|
489
|
+
end
|
490
|
+
end
|
491
|
+
|
492
|
+
# Generate reasoning text
|
493
|
+
sig { params(patterns: T::Hash[Symbol, T.untyped], traces: T::Array[ExecutionTrace]).returns(String) }
|
494
|
+
def generate_reasoning(patterns, traces)
|
495
|
+
reasoning_parts = []
|
496
|
+
|
497
|
+
reasoning_parts << "Analyzed #{traces.size} execution traces"
|
498
|
+
reasoning_parts << "#{patterns[:llm_traces_count]} LLM interactions"
|
499
|
+
reasoning_parts << "#{patterns[:module_traces_count]} module operations"
|
500
|
+
reasoning_parts << "Total token usage: #{patterns[:total_tokens]}"
|
501
|
+
|
502
|
+
reasoning_parts.join('. ') + '.'
|
503
|
+
end
|
504
|
+
|
505
|
+
# Calculate confidence based on patterns
|
506
|
+
sig { params(patterns: T::Hash[Symbol, T.untyped]).returns(Float) }
|
507
|
+
def calculate_confidence(patterns)
|
508
|
+
base_confidence = 0.7
|
509
|
+
|
510
|
+
# More traces = higher confidence
|
511
|
+
trace_bonus = [patterns[:llm_traces_count] + patterns[:module_traces_count], 10].min * 0.02
|
512
|
+
|
513
|
+
# Reasonable token usage = higher confidence
|
514
|
+
token_penalty = patterns[:total_tokens] > 1000 ? -0.1 : 0.0
|
515
|
+
|
516
|
+
[(base_confidence + trace_bonus + token_penalty), 1.0].min
|
517
|
+
end
|
518
|
+
|
519
|
+
# Calculate average response length from LLM traces
|
520
|
+
sig { params(llm_traces: T::Array[ExecutionTrace]).returns(Integer) }
|
521
|
+
def calculate_avg_response_length(llm_traces)
|
522
|
+
return 0 if llm_traces.empty?
|
523
|
+
|
524
|
+
total_length = llm_traces.sum do |trace|
|
525
|
+
response = trace.response_text
|
526
|
+
response ? response.length : 0
|
527
|
+
end
|
528
|
+
|
529
|
+
total_length / llm_traces.size
|
530
|
+
end
|
531
|
+
|
532
|
+
# Calculate timespan of traces
|
533
|
+
sig { params(traces: T::Array[ExecutionTrace]).returns(Float) }
|
534
|
+
def calculate_timespan(traces)
|
535
|
+
return 0.0 if traces.size < 2
|
536
|
+
|
537
|
+
timestamps = traces.map(&:timestamp).sort
|
538
|
+
(timestamps.last - timestamps.first).to_f
|
539
|
+
end
|
540
|
+
end
|
541
|
+
|
542
|
+
# Configuration for GEPA optimization
|
543
|
+
class GEPAConfig < Config
|
544
|
+
extend T::Sig
|
545
|
+
|
546
|
+
sig { returns(String) }
|
547
|
+
attr_accessor :reflection_lm
|
548
|
+
|
549
|
+
sig { returns(Integer) }
|
550
|
+
attr_accessor :num_generations
|
551
|
+
|
552
|
+
sig { returns(Integer) }
|
553
|
+
attr_accessor :population_size
|
554
|
+
|
555
|
+
sig { returns(Float) }
|
556
|
+
attr_accessor :mutation_rate
|
557
|
+
|
558
|
+
sig { returns(T::Boolean) }
|
559
|
+
attr_accessor :use_pareto_selection
|
560
|
+
|
561
|
+
sig { void }
|
562
|
+
def initialize
|
563
|
+
super
|
564
|
+
@reflection_lm = 'gpt-4o'
|
565
|
+
@num_generations = 10
|
566
|
+
@population_size = 8
|
567
|
+
@mutation_rate = 0.7
|
568
|
+
@use_pareto_selection = true
|
569
|
+
end
|
570
|
+
|
571
|
+
sig { returns(T::Hash[Symbol, T.untyped]) }
|
572
|
+
def to_h
|
573
|
+
super.merge({
|
574
|
+
reflection_lm: @reflection_lm,
|
575
|
+
num_generations: @num_generations,
|
576
|
+
population_size: @population_size,
|
577
|
+
mutation_rate: @mutation_rate,
|
578
|
+
use_pareto_selection: @use_pareto_selection
|
579
|
+
})
|
580
|
+
end
|
581
|
+
end
|
582
|
+
|
583
|
+
sig { returns(GEPAConfig) }
|
584
|
+
attr_reader :config
|
585
|
+
|
586
|
+
sig do
|
587
|
+
params(
|
588
|
+
metric: T.nilable(T.proc.params(arg0: T.untyped, arg1: T.untyped).returns(T.untyped)),
|
589
|
+
config: T.nilable(GEPAConfig)
|
590
|
+
).void
|
591
|
+
end
|
592
|
+
def initialize(metric: nil, config: nil)
|
593
|
+
@config = config || GEPAConfig.new
|
594
|
+
super(metric: metric, config: @config)
|
595
|
+
end
|
596
|
+
|
597
|
+
# Main optimization method
|
598
|
+
sig do
|
599
|
+
params(
|
600
|
+
program: T.untyped,
|
601
|
+
trainset: T::Array[T.untyped],
|
602
|
+
valset: T.nilable(T::Array[T.untyped])
|
603
|
+
).returns(OptimizationResult)
|
604
|
+
end
|
605
|
+
def compile(program, trainset:, valset: nil)
|
606
|
+
validate_inputs(program, trainset, valset)
|
607
|
+
|
608
|
+
instrument_step('gepa_compile', {
|
609
|
+
trainset_size: trainset.size,
|
610
|
+
valset_size: valset&.size || 0,
|
611
|
+
num_generations: @config.num_generations,
|
612
|
+
population_size: @config.population_size
|
613
|
+
}) do
|
614
|
+
# For Phase 1, return a basic optimization result
|
615
|
+
# Future phases will implement the full genetic algorithm
|
616
|
+
|
617
|
+
OptimizationResult.new(
|
618
|
+
optimized_program: program,
|
619
|
+
scores: { gepa_score: 0.0 },
|
620
|
+
history: {
|
621
|
+
num_generations: @config.num_generations,
|
622
|
+
population_size: @config.population_size,
|
623
|
+
phase: 'Phase 1 - Basic Structure'
|
624
|
+
},
|
625
|
+
best_score_name: 'gepa_score',
|
626
|
+
best_score_value: 0.0,
|
627
|
+
metadata: {
|
628
|
+
optimizer: 'GEPA',
|
629
|
+
reflection_lm: @config.reflection_lm,
|
630
|
+
implementation_status: 'Phase 1 - Infrastructure Complete'
|
631
|
+
}
|
632
|
+
)
|
633
|
+
end
|
634
|
+
end
|
635
|
+
end
|
636
|
+
end
|
637
|
+
end
|
@@ -316,7 +316,7 @@ module DSPy
|
|
316
316
|
operation: "optimization.#{step_name}",
|
317
317
|
'dspy.module' => 'Teleprompter',
|
318
318
|
'teleprompter.class' => self.class.name,
|
319
|
-
'teleprompter.config' => @config.to_h,
|
319
|
+
'teleprompter.config' => @config.to_h.to_json,
|
320
320
|
**payload
|
321
321
|
) do
|
322
322
|
yield
|
data/lib/dspy/version.rb
CHANGED
data/lib/dspy.rb
CHANGED
@@ -9,6 +9,8 @@ require_relative 'dspy/errors'
|
|
9
9
|
require_relative 'dspy/type_serializer'
|
10
10
|
require_relative 'dspy/observability'
|
11
11
|
require_relative 'dspy/context'
|
12
|
+
require_relative 'dspy/events'
|
13
|
+
require_relative 'dspy/events/types'
|
12
14
|
|
13
15
|
module DSPy
|
14
16
|
extend Dry::Configurable
|
@@ -34,18 +36,105 @@ module DSPy
|
|
34
36
|
end
|
35
37
|
|
36
38
|
def self.log(event, **attributes)
|
39
|
+
# Return nil early if logger is not configured (backward compatibility)
|
40
|
+
return nil unless logger
|
41
|
+
|
42
|
+
# Forward to event system - this maintains backward compatibility
|
43
|
+
# while providing all new event system benefits
|
44
|
+
event(event, attributes)
|
45
|
+
|
46
|
+
# Return nil to maintain backward compatibility
|
47
|
+
nil
|
48
|
+
end
|
49
|
+
|
50
|
+
def self.event(event_name_or_object, attributes = {})
|
51
|
+
# Handle typed event objects
|
52
|
+
if event_name_or_object.respond_to?(:name) && event_name_or_object.respond_to?(:to_attributes)
|
53
|
+
event_obj = event_name_or_object
|
54
|
+
event_name = event_obj.name
|
55
|
+
attributes = event_obj.to_attributes
|
56
|
+
|
57
|
+
# For LLM events, use OpenTelemetry semantic conventions for spans
|
58
|
+
if event_obj.is_a?(DSPy::Events::LLMEvent)
|
59
|
+
otel_attributes = event_obj.to_otel_attributes
|
60
|
+
create_event_span(event_name, otel_attributes)
|
61
|
+
else
|
62
|
+
create_event_span(event_name, attributes)
|
63
|
+
end
|
64
|
+
else
|
65
|
+
# Handle string event names (backward compatibility)
|
66
|
+
event_name = event_name_or_object
|
67
|
+
raise ArgumentError, "Event name cannot be nil" if event_name.nil?
|
68
|
+
|
69
|
+
# Handle nil attributes
|
70
|
+
attributes = {} if attributes.nil?
|
71
|
+
|
72
|
+
# Create OpenTelemetry span for the event if observability is enabled
|
73
|
+
create_event_span(event_name, attributes)
|
74
|
+
end
|
75
|
+
|
76
|
+
# Perform the actual logging (original DSPy.log behavior)
|
77
|
+
emit_log(event_name, attributes)
|
78
|
+
|
79
|
+
# Notify event listeners
|
80
|
+
events.notify(event_name, attributes)
|
81
|
+
end
|
82
|
+
|
83
|
+
def self.events
|
84
|
+
@event_registry ||= DSPy::EventRegistry.new
|
85
|
+
end
|
86
|
+
|
87
|
+
private
|
88
|
+
|
89
|
+
def self.emit_log(event_name, attributes)
|
37
90
|
return unless logger
|
38
91
|
|
39
92
|
# Merge context automatically (but don't include span_stack)
|
40
93
|
context = Context.current.dup
|
41
94
|
context.delete(:span_stack)
|
42
95
|
attributes = context.merge(attributes)
|
43
|
-
attributes[:event] =
|
96
|
+
attributes[:event] = event_name
|
44
97
|
|
45
98
|
# Use Dry::Logger's structured logging
|
46
99
|
logger.info(attributes)
|
47
100
|
end
|
48
101
|
|
102
|
+
def self.create_event_span(event_name, attributes)
|
103
|
+
return unless DSPy::Observability.enabled?
|
104
|
+
|
105
|
+
begin
|
106
|
+
# Flatten nested hashes for OpenTelemetry span attributes
|
107
|
+
flattened_attributes = flatten_attributes(attributes)
|
108
|
+
|
109
|
+
# Create and immediately finish a span for this event
|
110
|
+
# Events are instant moments in time, not ongoing operations
|
111
|
+
span = DSPy::Observability.start_span(event_name, flattened_attributes)
|
112
|
+
DSPy::Observability.finish_span(span) if span
|
113
|
+
rescue => e
|
114
|
+
# Log error but don't let it break the event system
|
115
|
+
# Use emit_log directly to avoid infinite recursion
|
116
|
+
emit_log('event.span_creation_error', {
|
117
|
+
error_class: e.class.name,
|
118
|
+
error_message: e.message,
|
119
|
+
event_name: event_name
|
120
|
+
})
|
121
|
+
end
|
122
|
+
end
|
123
|
+
|
124
|
+
def self.flatten_attributes(attributes, parent_key = '', result = {})
|
125
|
+
attributes.each do |key, value|
|
126
|
+
new_key = parent_key.empty? ? key.to_s : "#{parent_key}.#{key}"
|
127
|
+
|
128
|
+
if value.is_a?(Hash)
|
129
|
+
flatten_attributes(value, new_key, result)
|
130
|
+
else
|
131
|
+
result[new_key] = value
|
132
|
+
end
|
133
|
+
end
|
134
|
+
|
135
|
+
result
|
136
|
+
end
|
137
|
+
|
49
138
|
def self.create_logger
|
50
139
|
env = ENV['RACK_ENV'] || ENV['RAILS_ENV'] || 'development'
|
51
140
|
log_output = ENV['DSPY_LOG'] # Allow override
|
@@ -101,6 +190,8 @@ require_relative 'dspy/image'
|
|
101
190
|
require_relative 'dspy/strategy'
|
102
191
|
require_relative 'dspy/prediction'
|
103
192
|
require_relative 'dspy/predict'
|
193
|
+
require_relative 'dspy/events/subscribers'
|
194
|
+
require_relative 'dspy/events/subscriber_mixin'
|
104
195
|
require_relative 'dspy/chain_of_thought'
|
105
196
|
require_relative 'dspy/re_act'
|
106
197
|
require_relative 'dspy/code_act'
|
@@ -111,6 +202,7 @@ require_relative 'dspy/teleprompt/data_handler'
|
|
111
202
|
require_relative 'dspy/propose/grounded_proposer'
|
112
203
|
require_relative 'dspy/teleprompt/simple_optimizer'
|
113
204
|
require_relative 'dspy/teleprompt/mipro_v2'
|
205
|
+
require_relative 'dspy/teleprompt/gepa'
|
114
206
|
require_relative 'dspy/tools'
|
115
207
|
require_relative 'dspy/memory'
|
116
208
|
require_relative 'dspy/storage/program_storage'
|
metadata
CHANGED
@@ -1,13 +1,13 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: dspy
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.22.1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Vicente Reig Rincón de Arellano
|
8
8
|
bindir: bin
|
9
9
|
cert_chain: []
|
10
|
-
date: 2025-09-
|
10
|
+
date: 2025-09-05 00:00:00.000000000 Z
|
11
11
|
dependencies:
|
12
12
|
- !ruby/object:Gem::Dependency
|
13
13
|
name: dry-configurable
|
@@ -177,7 +177,8 @@ dependencies:
|
|
177
177
|
- - "~>"
|
178
178
|
- !ruby/object:Gem::Version
|
179
179
|
version: '0.30'
|
180
|
-
description: The Ruby framework for programming with large language models.
|
180
|
+
description: The Ruby framework for programming with large language models. Includes
|
181
|
+
event-driven observability system with OpenTelemetry integration and Langfuse export.
|
181
182
|
email:
|
182
183
|
- hey@vicente.services
|
183
184
|
executables: []
|
@@ -192,6 +193,10 @@ files:
|
|
192
193
|
- lib/dspy/error_formatter.rb
|
193
194
|
- lib/dspy/errors.rb
|
194
195
|
- lib/dspy/evaluate.rb
|
196
|
+
- lib/dspy/events.rb
|
197
|
+
- lib/dspy/events/subscriber_mixin.rb
|
198
|
+
- lib/dspy/events/subscribers.rb
|
199
|
+
- lib/dspy/events/types.rb
|
195
200
|
- lib/dspy/example.rb
|
196
201
|
- lib/dspy/few_shot_example.rb
|
197
202
|
- lib/dspy/field.rb
|
@@ -244,6 +249,7 @@ files:
|
|
244
249
|
- lib/dspy/storage/storage_manager.rb
|
245
250
|
- lib/dspy/strategy.rb
|
246
251
|
- lib/dspy/teleprompt/data_handler.rb
|
252
|
+
- lib/dspy/teleprompt/gepa.rb
|
247
253
|
- lib/dspy/teleprompt/mipro_v2.rb
|
248
254
|
- lib/dspy/teleprompt/simple_optimizer.rb
|
249
255
|
- lib/dspy/teleprompt/teleprompter.rb
|