looped 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/PLAN.md +856 -0
- data/README.md +340 -0
- data/docs/self-improving-coding-agent.md +374 -0
- data/exe/looped +115 -0
- data/lib/looped/agent.rb +188 -0
- data/lib/looped/application.rb +252 -0
- data/lib/looped/judge.rb +90 -0
- data/lib/looped/memory.rb +96 -0
- data/lib/looped/optimizer.rb +267 -0
- data/lib/looped/signatures.rb +40 -0
- data/lib/looped/state.rb +120 -0
- data/lib/looped/tools/read_file.rb +35 -0
- data/lib/looped/tools/run_command.rb +56 -0
- data/lib/looped/tools/search_code.rb +38 -0
- data/lib/looped/tools/write_file.rb +37 -0
- data/lib/looped/types.rb +53 -0
- data/lib/looped/version.rb +6 -0
- data/lib/looped.rb +100 -0
- data/looped.gemspec +47 -0
- metadata +246 -0
|
@@ -0,0 +1,252 @@
|
|
|
1
|
+
# typed: strict
|
|
2
|
+
# frozen_string_literal: true
|
|
3
|
+
|
|
4
|
+
require 'async'
|
|
5
|
+
require 'readline'
|
|
6
|
+
|
|
7
|
+
module Looped
|
|
8
|
+
class Application
|
|
9
|
+
extend T::Sig
|
|
10
|
+
|
|
11
|
+
sig { returns(Agent) }
|
|
12
|
+
attr_reader :agent
|
|
13
|
+
|
|
14
|
+
sig { returns(Optimizer) }
|
|
15
|
+
attr_reader :optimizer
|
|
16
|
+
|
|
17
|
+
sig { returns(State) }
|
|
18
|
+
attr_reader :state
|
|
19
|
+
|
|
20
|
+
sig do
|
|
21
|
+
params(
|
|
22
|
+
model: T.nilable(String),
|
|
23
|
+
judge_model: T.nilable(String),
|
|
24
|
+
reflection_model: T.nilable(String),
|
|
25
|
+
max_iterations: Integer,
|
|
26
|
+
optimizer_batch_size: Integer,
|
|
27
|
+
optimizer_interval: Integer
|
|
28
|
+
).void
|
|
29
|
+
end
|
|
30
|
+
def initialize(
|
|
31
|
+
model: nil,
|
|
32
|
+
judge_model: nil,
|
|
33
|
+
reflection_model: nil,
|
|
34
|
+
max_iterations: 10,
|
|
35
|
+
optimizer_batch_size: 5,
|
|
36
|
+
optimizer_interval: 60
|
|
37
|
+
)
|
|
38
|
+
@state = T.let(State.new, State)
|
|
39
|
+
@agent = T.let(
|
|
40
|
+
Agent.new(
|
|
41
|
+
model: model,
|
|
42
|
+
max_iterations: max_iterations,
|
|
43
|
+
judge_model: judge_model
|
|
44
|
+
),
|
|
45
|
+
Agent
|
|
46
|
+
)
|
|
47
|
+
@optimizer = T.let(
|
|
48
|
+
Optimizer.new(
|
|
49
|
+
state: @state,
|
|
50
|
+
batch_size: optimizer_batch_size,
|
|
51
|
+
check_interval: optimizer_interval,
|
|
52
|
+
reflection_model: reflection_model
|
|
53
|
+
),
|
|
54
|
+
Optimizer
|
|
55
|
+
)
|
|
56
|
+
@running = T.let(false, T::Boolean)
|
|
57
|
+
end
|
|
58
|
+
|
|
59
|
+
sig { void }
|
|
60
|
+
def run
|
|
61
|
+
@running = true
|
|
62
|
+
|
|
63
|
+
puts banner
|
|
64
|
+
puts "Type a coding task and press Enter. Type 'quit' to exit."
|
|
65
|
+
puts "Type 'status' to see optimization status."
|
|
66
|
+
puts ""
|
|
67
|
+
|
|
68
|
+
Async do |task|
|
|
69
|
+
# Start the optimizer in the background
|
|
70
|
+
optimizer_task = task.async do
|
|
71
|
+
@optimizer.start
|
|
72
|
+
end
|
|
73
|
+
|
|
74
|
+
# Run the interactive loop
|
|
75
|
+
begin
|
|
76
|
+
interactive_loop
|
|
77
|
+
ensure
|
|
78
|
+
@optimizer.stop
|
|
79
|
+
optimizer_task.stop
|
|
80
|
+
end
|
|
81
|
+
end
|
|
82
|
+
end
|
|
83
|
+
|
|
84
|
+
sig { params(task: String, context: String).returns(Types::TrainingResult) }
|
|
85
|
+
def run_task(task:, context: '')
|
|
86
|
+
@agent.run(task: task, context: context)
|
|
87
|
+
end
|
|
88
|
+
|
|
89
|
+
sig { void }
|
|
90
|
+
def stop
|
|
91
|
+
@running = false
|
|
92
|
+
@optimizer.stop
|
|
93
|
+
end
|
|
94
|
+
|
|
95
|
+
private
|
|
96
|
+
|
|
97
|
+
sig { void }
|
|
98
|
+
def interactive_loop
|
|
99
|
+
while @running
|
|
100
|
+
input = Readline.readline('looped> ', true)
|
|
101
|
+
|
|
102
|
+
if input.nil? || input.strip.downcase == 'quit'
|
|
103
|
+
puts "\nGoodbye!"
|
|
104
|
+
@running = false
|
|
105
|
+
break
|
|
106
|
+
end
|
|
107
|
+
|
|
108
|
+
next if input.strip.empty?
|
|
109
|
+
|
|
110
|
+
handle_input(input.strip)
|
|
111
|
+
end
|
|
112
|
+
end
|
|
113
|
+
|
|
114
|
+
sig { params(input: String).void }
|
|
115
|
+
def handle_input(input)
|
|
116
|
+
case input.downcase
|
|
117
|
+
when 'status'
|
|
118
|
+
show_status
|
|
119
|
+
when 'history'
|
|
120
|
+
show_history
|
|
121
|
+
when 'help'
|
|
122
|
+
show_help
|
|
123
|
+
when /^context\s+(.+)/i
|
|
124
|
+
set_context(T.must($1))
|
|
125
|
+
else
|
|
126
|
+
execute_task(input)
|
|
127
|
+
end
|
|
128
|
+
end
|
|
129
|
+
|
|
130
|
+
sig { void }
|
|
131
|
+
def show_status
|
|
132
|
+
instructions = @state.load_instructions
|
|
133
|
+
buffer = @state.peek_training_buffer
|
|
134
|
+
|
|
135
|
+
puts ""
|
|
136
|
+
puts "=== Looped Status ==="
|
|
137
|
+
puts ""
|
|
138
|
+
|
|
139
|
+
if instructions
|
|
140
|
+
puts "Current Instructions:"
|
|
141
|
+
puts " Generation: #{instructions.generation}"
|
|
142
|
+
puts " Best Score: #{instructions.score.round(2)}/10"
|
|
143
|
+
puts " Updated: #{instructions.updated_at}"
|
|
144
|
+
else
|
|
145
|
+
puts "No optimized instructions yet (using defaults)"
|
|
146
|
+
end
|
|
147
|
+
|
|
148
|
+
puts ""
|
|
149
|
+
puts "Training Buffer: #{buffer.length} results"
|
|
150
|
+
puts "Optimizer: #{@optimizer.running ? 'Running' : 'Stopped'}"
|
|
151
|
+
puts ""
|
|
152
|
+
end
|
|
153
|
+
|
|
154
|
+
sig { void }
|
|
155
|
+
def show_history
|
|
156
|
+
buffer = @state.peek_training_buffer
|
|
157
|
+
|
|
158
|
+
if buffer.empty?
|
|
159
|
+
puts "\nNo tasks completed yet.\n"
|
|
160
|
+
return
|
|
161
|
+
end
|
|
162
|
+
|
|
163
|
+
puts ""
|
|
164
|
+
puts "=== Recent Tasks ==="
|
|
165
|
+
puts ""
|
|
166
|
+
|
|
167
|
+
buffer.last(5).each_with_index do |result, i|
|
|
168
|
+
puts "#{i + 1}. #{result.task.truncate(60)}"
|
|
169
|
+
puts " Score: #{result.score.round(2)}/10 | #{result.timestamp}"
|
|
170
|
+
puts ""
|
|
171
|
+
end
|
|
172
|
+
end
|
|
173
|
+
|
|
174
|
+
sig { void }
|
|
175
|
+
def show_help
|
|
176
|
+
puts <<~HELP
|
|
177
|
+
|
|
178
|
+
=== Looped Commands ===
|
|
179
|
+
|
|
180
|
+
<task> Execute a coding task
|
|
181
|
+
status Show optimization status
|
|
182
|
+
history Show recent task history
|
|
183
|
+
help Show this help message
|
|
184
|
+
quit Exit the application
|
|
185
|
+
|
|
186
|
+
=== Environment Variables ===
|
|
187
|
+
|
|
188
|
+
LOOPED_MODEL Agent model (default: gpt-4o-mini)
|
|
189
|
+
LOOPED_JUDGE_MODEL Judge model (default: gpt-4o-mini)
|
|
190
|
+
LOOPED_REFLECTION_MODEL GEPA reflection model (default: gpt-4o-mini)
|
|
191
|
+
|
|
192
|
+
HELP
|
|
193
|
+
end
|
|
194
|
+
|
|
195
|
+
sig { params(context: String).void }
|
|
196
|
+
def set_context(context)
|
|
197
|
+
@current_context = context
|
|
198
|
+
puts "\nContext set to: #{context}\n"
|
|
199
|
+
end
|
|
200
|
+
|
|
201
|
+
sig { params(task: String).void }
|
|
202
|
+
def execute_task(task)
|
|
203
|
+
puts "\nExecuting task..."
|
|
204
|
+
puts ""
|
|
205
|
+
|
|
206
|
+
context = @current_context || ''
|
|
207
|
+
|
|
208
|
+
begin
|
|
209
|
+
result = @agent.run(task: task, context: context)
|
|
210
|
+
|
|
211
|
+
puts "=== Result ==="
|
|
212
|
+
puts ""
|
|
213
|
+
puts "Score: #{result.score.round(2)}/10"
|
|
214
|
+
puts ""
|
|
215
|
+
puts "Solution:"
|
|
216
|
+
puts result.solution
|
|
217
|
+
puts ""
|
|
218
|
+
puts "Feedback:"
|
|
219
|
+
puts result.feedback
|
|
220
|
+
puts ""
|
|
221
|
+
rescue StandardError => e
|
|
222
|
+
puts "Error: #{e.message}"
|
|
223
|
+
puts e.backtrace&.first(5)&.join("\n")
|
|
224
|
+
puts ""
|
|
225
|
+
end
|
|
226
|
+
end
|
|
227
|
+
|
|
228
|
+
sig { returns(String) }
|
|
229
|
+
def banner
|
|
230
|
+
<<~BANNER
|
|
231
|
+
|
|
232
|
+
╦ ┌─┐┌─┐┌─┐┌─┐┌┬┐
|
|
233
|
+
║ │ ││ │├─┘├┤ ││
|
|
234
|
+
╩═╝└─┘└─┘┴ └─┘─┴┘
|
|
235
|
+
|
|
236
|
+
Self-improving coding agent powered by DSPy.rb + GEPA
|
|
237
|
+
|
|
238
|
+
BANNER
|
|
239
|
+
end
|
|
240
|
+
end
|
|
241
|
+
end
|
|
242
|
+
|
|
243
|
+
# Add String#truncate for display
|
|
244
|
+
unless String.method_defined?(:truncate)
|
|
245
|
+
class String
|
|
246
|
+
def truncate(length, omission: '...')
|
|
247
|
+
return self if self.length <= length
|
|
248
|
+
|
|
249
|
+
"#{self[0, length - omission.length]}#{omission}"
|
|
250
|
+
end
|
|
251
|
+
end
|
|
252
|
+
end
|
data/lib/looped/judge.rb
ADDED
|
@@ -0,0 +1,90 @@
|
|
|
1
|
+
# typed: strict
|
|
2
|
+
# frozen_string_literal: true
|
|
3
|
+
|
|
4
|
+
module Looped
|
|
5
|
+
class Judge
|
|
6
|
+
extend T::Sig
|
|
7
|
+
|
|
8
|
+
DEFAULT_MODEL = 'openai/gpt-4o-mini'
|
|
9
|
+
|
|
10
|
+
sig { returns(DSPy::Predict) }
|
|
11
|
+
attr_reader :predictor
|
|
12
|
+
|
|
13
|
+
sig { params(model: T.nilable(String), api_key: T.nilable(String)).void }
|
|
14
|
+
def initialize(model: nil, api_key: nil)
|
|
15
|
+
model_id = model || ENV.fetch('LOOPED_JUDGE_MODEL', DEFAULT_MODEL)
|
|
16
|
+
resolved_api_key = api_key || resolve_api_key(model_id)
|
|
17
|
+
|
|
18
|
+
# Configure DSPy with our LM
|
|
19
|
+
DSPy.configure do |c|
|
|
20
|
+
c.lm = DSPy::LM.new(model_id, api_key: resolved_api_key)
|
|
21
|
+
end
|
|
22
|
+
|
|
23
|
+
@predictor = T.let(DSPy::Predict.new(Looped::JudgeSignature), DSPy::Predict)
|
|
24
|
+
end
|
|
25
|
+
|
|
26
|
+
sig { params(task: String, solution: String, expected_behavior: String).returns(Types::Judgment) }
|
|
27
|
+
def evaluate(task:, solution:, expected_behavior: '')
|
|
28
|
+
result = predictor.call(
|
|
29
|
+
task: task,
|
|
30
|
+
solution: solution,
|
|
31
|
+
expected_behavior: expected_behavior.empty? ? infer_expected_behavior(task) : expected_behavior
|
|
32
|
+
)
|
|
33
|
+
|
|
34
|
+
Types::Judgment.new(
|
|
35
|
+
score: normalize_score(result.score),
|
|
36
|
+
passed: result.passed,
|
|
37
|
+
critique: result.critique,
|
|
38
|
+
suggestions: result.suggestions || []
|
|
39
|
+
)
|
|
40
|
+
end
|
|
41
|
+
|
|
42
|
+
sig { params(judgment: Types::Judgment).returns(String) }
|
|
43
|
+
def to_feedback(judgment)
|
|
44
|
+
parts = []
|
|
45
|
+
parts << "Score: #{judgment.score}/10"
|
|
46
|
+
parts << "Status: #{judgment.passed ? 'PASSED' : 'FAILED'}"
|
|
47
|
+
parts << ""
|
|
48
|
+
parts << "Critique:"
|
|
49
|
+
parts << judgment.critique
|
|
50
|
+
|
|
51
|
+
if judgment.suggestions.any?
|
|
52
|
+
parts << ""
|
|
53
|
+
parts << "Suggestions for improvement:"
|
|
54
|
+
judgment.suggestions.each_with_index do |suggestion, i|
|
|
55
|
+
parts << " #{i + 1}. #{suggestion}"
|
|
56
|
+
end
|
|
57
|
+
end
|
|
58
|
+
|
|
59
|
+
parts.join("\n")
|
|
60
|
+
end
|
|
61
|
+
|
|
62
|
+
private
|
|
63
|
+
|
|
64
|
+
sig { params(model_id: String).returns(T.nilable(String)) }
|
|
65
|
+
def resolve_api_key(model_id)
|
|
66
|
+
provider = model_id.split('/').first
|
|
67
|
+
case provider
|
|
68
|
+
when 'openai'
|
|
69
|
+
ENV['OPENAI_API_KEY']
|
|
70
|
+
when 'anthropic'
|
|
71
|
+
ENV['ANTHROPIC_API_KEY']
|
|
72
|
+
when 'gemini', 'google'
|
|
73
|
+
ENV['GEMINI_API_KEY']
|
|
74
|
+
else
|
|
75
|
+
ENV['OPENAI_API_KEY'] # Default fallback
|
|
76
|
+
end
|
|
77
|
+
end
|
|
78
|
+
|
|
79
|
+
sig { params(task: String).returns(String) }
|
|
80
|
+
def infer_expected_behavior(task)
|
|
81
|
+
"The solution should correctly and completely address the task: #{task}"
|
|
82
|
+
end
|
|
83
|
+
|
|
84
|
+
sig { params(score: T.untyped).returns(Float) }
|
|
85
|
+
def normalize_score(score)
|
|
86
|
+
value = score.to_f
|
|
87
|
+
[[value, 0.0].max, 10.0].min
|
|
88
|
+
end
|
|
89
|
+
end
|
|
90
|
+
end
|
|
@@ -0,0 +1,96 @@
|
|
|
1
|
+
# typed: strict
|
|
2
|
+
# frozen_string_literal: true
|
|
3
|
+
|
|
4
|
+
module Looped
|
|
5
|
+
class Memory
|
|
6
|
+
extend T::Sig
|
|
7
|
+
|
|
8
|
+
DEFAULT_MAX_ENTRIES = 10
|
|
9
|
+
DEFAULT_MAX_RESULT_LENGTH = 500
|
|
10
|
+
|
|
11
|
+
sig { params(max_entries: Integer, max_result_length: Integer).void }
|
|
12
|
+
def initialize(max_entries: DEFAULT_MAX_ENTRIES, max_result_length: DEFAULT_MAX_RESULT_LENGTH)
|
|
13
|
+
@entries = T.let([], T::Array[Types::MemoryEntry])
|
|
14
|
+
@max_entries = max_entries
|
|
15
|
+
@max_result_length = max_result_length
|
|
16
|
+
end
|
|
17
|
+
|
|
18
|
+
sig do
|
|
19
|
+
params(
|
|
20
|
+
action_type: String,
|
|
21
|
+
action_input: T::Hash[T.any(String, Symbol), T.untyped],
|
|
22
|
+
action_output: String,
|
|
23
|
+
model_id: T.nilable(String),
|
|
24
|
+
tokens_used: T.nilable(Integer),
|
|
25
|
+
error: T.nilable(String)
|
|
26
|
+
).void
|
|
27
|
+
end
|
|
28
|
+
def add(action_type:, action_input:, action_output:, model_id: nil, tokens_used: nil, error: nil)
|
|
29
|
+
@entries << Types::MemoryEntry.new(
|
|
30
|
+
action_type: action_type,
|
|
31
|
+
action_input: stringify_keys(action_input),
|
|
32
|
+
action_output: action_output,
|
|
33
|
+
timestamp: Time.now.utc.iso8601,
|
|
34
|
+
model_id: model_id,
|
|
35
|
+
error: error,
|
|
36
|
+
tokens_used: tokens_used
|
|
37
|
+
)
|
|
38
|
+
|
|
39
|
+
# Enforce max_entries limit
|
|
40
|
+
@entries.shift while @entries.size > @max_entries
|
|
41
|
+
end
|
|
42
|
+
|
|
43
|
+
sig { returns(T::Array[Types::ActionSummary]) }
|
|
44
|
+
def to_context
|
|
45
|
+
@entries.last(@max_entries).map do |entry|
|
|
46
|
+
Types::ActionSummary.new(
|
|
47
|
+
action: summarize_action(entry),
|
|
48
|
+
result: truncate(entry.action_output)
|
|
49
|
+
)
|
|
50
|
+
end
|
|
51
|
+
end
|
|
52
|
+
|
|
53
|
+
sig { returns(T::Array[Types::MemoryEntry]) }
|
|
54
|
+
def entries
|
|
55
|
+
@entries.dup
|
|
56
|
+
end
|
|
57
|
+
|
|
58
|
+
sig { returns(Integer) }
|
|
59
|
+
def size
|
|
60
|
+
@entries.size
|
|
61
|
+
end
|
|
62
|
+
|
|
63
|
+
sig { void }
|
|
64
|
+
def clear
|
|
65
|
+
@entries.clear
|
|
66
|
+
end
|
|
67
|
+
|
|
68
|
+
sig { params(n: Integer).returns(T::Array[Types::MemoryEntry]) }
|
|
69
|
+
def recent(n)
|
|
70
|
+
@entries.last(n)
|
|
71
|
+
end
|
|
72
|
+
|
|
73
|
+
private
|
|
74
|
+
|
|
75
|
+
sig { params(hash: T::Hash[T.any(String, Symbol), T.untyped]).returns(T::Hash[String, T.untyped]) }
|
|
76
|
+
def stringify_keys(hash)
|
|
77
|
+
hash.transform_keys(&:to_s)
|
|
78
|
+
end
|
|
79
|
+
|
|
80
|
+
sig { params(entry: Types::MemoryEntry).returns(String) }
|
|
81
|
+
def summarize_action(entry)
|
|
82
|
+
input_parts = entry.action_input.map do |k, v|
|
|
83
|
+
value_str = v.to_s
|
|
84
|
+
value_str = "#{value_str[0..47]}..." if value_str.length > 50
|
|
85
|
+
"#{k}=#{value_str}"
|
|
86
|
+
end
|
|
87
|
+
"#{entry.action_type}(#{input_parts.join(', ')})"
|
|
88
|
+
end
|
|
89
|
+
|
|
90
|
+
sig { params(text: String).returns(String) }
|
|
91
|
+
def truncate(text)
|
|
92
|
+
return text if text.length <= @max_result_length
|
|
93
|
+
"#{text[0...@max_result_length]}..."
|
|
94
|
+
end
|
|
95
|
+
end
|
|
96
|
+
end
|
|
@@ -0,0 +1,267 @@
|
|
|
1
|
+
# typed: strict
|
|
2
|
+
# frozen_string_literal: true
|
|
3
|
+
|
|
4
|
+
require 'async'
|
|
5
|
+
|
|
6
|
+
module Looped
|
|
7
|
+
class Optimizer
|
|
8
|
+
extend T::Sig
|
|
9
|
+
|
|
10
|
+
DEFAULT_BATCH_SIZE = 5
|
|
11
|
+
DEFAULT_MAX_METRIC_CALLS = 32
|
|
12
|
+
DEFAULT_CHECK_INTERVAL = 60
|
|
13
|
+
|
|
14
|
+
sig { returns(State) }
|
|
15
|
+
attr_reader :state
|
|
16
|
+
|
|
17
|
+
sig { returns(T::Boolean) }
|
|
18
|
+
attr_reader :running
|
|
19
|
+
|
|
20
|
+
sig do
|
|
21
|
+
params(
|
|
22
|
+
state: State,
|
|
23
|
+
batch_size: Integer,
|
|
24
|
+
max_metric_calls: Integer,
|
|
25
|
+
check_interval: Integer,
|
|
26
|
+
reflection_model: T.nilable(String)
|
|
27
|
+
).void
|
|
28
|
+
end
|
|
29
|
+
def initialize(
|
|
30
|
+
state:,
|
|
31
|
+
batch_size: DEFAULT_BATCH_SIZE,
|
|
32
|
+
max_metric_calls: DEFAULT_MAX_METRIC_CALLS,
|
|
33
|
+
check_interval: DEFAULT_CHECK_INTERVAL,
|
|
34
|
+
reflection_model: nil
|
|
35
|
+
)
|
|
36
|
+
@state = state
|
|
37
|
+
@batch_size = batch_size
|
|
38
|
+
@max_metric_calls = max_metric_calls
|
|
39
|
+
@check_interval = check_interval
|
|
40
|
+
@reflection_model = T.let(
|
|
41
|
+
reflection_model || ENV.fetch('LOOPED_REFLECTION_MODEL', 'openai/gpt-4o-mini'),
|
|
42
|
+
String
|
|
43
|
+
)
|
|
44
|
+
@running = T.let(false, T::Boolean)
|
|
45
|
+
end
|
|
46
|
+
|
|
47
|
+
sig { void }
|
|
48
|
+
def start
|
|
49
|
+
@running = true
|
|
50
|
+
|
|
51
|
+
Async do |task|
|
|
52
|
+
while @running
|
|
53
|
+
check_and_optimize
|
|
54
|
+
task.sleep(@check_interval)
|
|
55
|
+
end
|
|
56
|
+
end
|
|
57
|
+
end
|
|
58
|
+
|
|
59
|
+
sig { void }
|
|
60
|
+
def stop
|
|
61
|
+
@running = false
|
|
62
|
+
end
|
|
63
|
+
|
|
64
|
+
sig { void }
|
|
65
|
+
def check_and_optimize
|
|
66
|
+
buffer = @state.peek_training_buffer
|
|
67
|
+
return if buffer.length < @batch_size
|
|
68
|
+
|
|
69
|
+
optimize(buffer)
|
|
70
|
+
end
|
|
71
|
+
|
|
72
|
+
sig { params(training_results: T::Array[Types::TrainingResult]).void }
|
|
73
|
+
def optimize(training_results)
|
|
74
|
+
# Build trainset from training results
|
|
75
|
+
trainset = build_trainset(training_results)
|
|
76
|
+
return if trainset.empty?
|
|
77
|
+
|
|
78
|
+
# Load current instructions
|
|
79
|
+
current_instructions = @state.load_instructions
|
|
80
|
+
|
|
81
|
+
# Build the base agent for optimization
|
|
82
|
+
agent = build_agent_for_optimization(current_instructions)
|
|
83
|
+
|
|
84
|
+
# Configure GEPA metric
|
|
85
|
+
metric = build_metric
|
|
86
|
+
|
|
87
|
+
# Create reflection LM for GEPA
|
|
88
|
+
reflection_lm = DSPy::ReflectionLM.new(@reflection_model, api_key: resolve_api_key(@reflection_model))
|
|
89
|
+
|
|
90
|
+
# Build feedback map for multi-predictor optimization
|
|
91
|
+
feedback_map = build_feedback_map
|
|
92
|
+
|
|
93
|
+
# Run GEPA optimization
|
|
94
|
+
gepa = DSPy::Teleprompt::GEPA.new(
|
|
95
|
+
metric: metric,
|
|
96
|
+
reflection_lm: reflection_lm,
|
|
97
|
+
feedback_map: feedback_map,
|
|
98
|
+
config: {
|
|
99
|
+
max_metric_calls: @max_metric_calls,
|
|
100
|
+
minibatch_size: [@batch_size, trainset.length].min,
|
|
101
|
+
perfect_score: 10.0,
|
|
102
|
+
skip_perfect_score: true,
|
|
103
|
+
use_merge: trainset.length >= 4
|
|
104
|
+
}
|
|
105
|
+
)
|
|
106
|
+
|
|
107
|
+
# Split trainset - use majority for training, rest for validation
|
|
108
|
+
split_point = [(trainset.length * 0.8).ceil, trainset.length - 1].min
|
|
109
|
+
train_examples = trainset[0...split_point]
|
|
110
|
+
val_examples = trainset[split_point..]
|
|
111
|
+
|
|
112
|
+
result = gepa.compile(agent.react, trainset: train_examples, valset: val_examples)
|
|
113
|
+
|
|
114
|
+
# Extract optimized instructions from the best program
|
|
115
|
+
save_optimized_instructions(result, current_instructions)
|
|
116
|
+
|
|
117
|
+
# Consume the training buffer after successful optimization
|
|
118
|
+
@state.consume_training_buffer
|
|
119
|
+
end
|
|
120
|
+
|
|
121
|
+
private
|
|
122
|
+
|
|
123
|
+
sig { params(training_results: T::Array[Types::TrainingResult]).returns(T::Array[DSPy::Example]) }
|
|
124
|
+
def build_trainset(training_results)
|
|
125
|
+
training_results.map do |result|
|
|
126
|
+
DSPy::Example.new(
|
|
127
|
+
signature_class: CodingTaskSignature,
|
|
128
|
+
input: {
|
|
129
|
+
task: result.task,
|
|
130
|
+
context: '',
|
|
131
|
+
history: []
|
|
132
|
+
},
|
|
133
|
+
expected: {
|
|
134
|
+
solution: result.solution,
|
|
135
|
+
files_modified: []
|
|
136
|
+
},
|
|
137
|
+
metadata: {
|
|
138
|
+
score: result.score,
|
|
139
|
+
feedback: result.feedback
|
|
140
|
+
}
|
|
141
|
+
)
|
|
142
|
+
end
|
|
143
|
+
end
|
|
144
|
+
|
|
145
|
+
sig { params(instructions: T.nilable(Types::Instructions)).returns(Agent) }
|
|
146
|
+
def build_agent_for_optimization(instructions)
|
|
147
|
+
agent = Agent.new
|
|
148
|
+
|
|
149
|
+
# Apply existing instructions if present
|
|
150
|
+
if instructions
|
|
151
|
+
agent.reload_instructions
|
|
152
|
+
end
|
|
153
|
+
|
|
154
|
+
agent
|
|
155
|
+
end
|
|
156
|
+
|
|
157
|
+
sig { returns(T.proc.params(arg0: DSPy::Example, arg1: T.untyped).returns(T::Hash[Symbol, T.untyped])) }
|
|
158
|
+
def build_metric
|
|
159
|
+
lambda do |example, prediction|
|
|
160
|
+
# Extract the pre-computed score and feedback from metadata
|
|
161
|
+
metadata = example.metadata || {}
|
|
162
|
+
score = metadata[:score] || 0.0
|
|
163
|
+
feedback = metadata[:feedback] || ''
|
|
164
|
+
|
|
165
|
+
# Return score + feedback for GEPA reflection
|
|
166
|
+
{ score: score, feedback: feedback }
|
|
167
|
+
end
|
|
168
|
+
end
|
|
169
|
+
|
|
170
|
+
sig { returns(T::Hash[String, T.untyped]) }
|
|
171
|
+
def build_feedback_map
|
|
172
|
+
# Provide feedback functions for each predictor in ReAct
|
|
173
|
+
{
|
|
174
|
+
'thought_generator' => lambda do |predictor_output:, predictor_inputs:, module_inputs:, module_outputs:, captured_trace:|
|
|
175
|
+
metadata = module_inputs.metadata || {}
|
|
176
|
+
feedback = metadata[:feedback] || ''
|
|
177
|
+
score = metadata[:score] || 0.0
|
|
178
|
+
|
|
179
|
+
# Filter feedback to focus on thought generation quality
|
|
180
|
+
thought_feedback = extract_thought_feedback(feedback)
|
|
181
|
+
|
|
182
|
+
{ score: score, feedback: thought_feedback }
|
|
183
|
+
end,
|
|
184
|
+
'observation_processor' => lambda do |predictor_output:, predictor_inputs:, module_inputs:, module_outputs:, captured_trace:|
|
|
185
|
+
metadata = module_inputs.metadata || {}
|
|
186
|
+
feedback = metadata[:feedback] || ''
|
|
187
|
+
score = metadata[:score] || 0.0
|
|
188
|
+
|
|
189
|
+
# Filter feedback to focus on observation processing
|
|
190
|
+
observation_feedback = extract_observation_feedback(feedback)
|
|
191
|
+
|
|
192
|
+
{ score: score, feedback: observation_feedback }
|
|
193
|
+
end
|
|
194
|
+
}
|
|
195
|
+
end
|
|
196
|
+
|
|
197
|
+
sig { params(feedback: String).returns(String) }
|
|
198
|
+
def extract_thought_feedback(feedback)
|
|
199
|
+
# Extract feedback relevant to reasoning and planning
|
|
200
|
+
lines = feedback.lines.select do |line|
|
|
201
|
+
line.match?(/reason|think|plan|approach|logic|decision/i) ||
|
|
202
|
+
line.match?(/score|critique|suggestion/i)
|
|
203
|
+
end
|
|
204
|
+
|
|
205
|
+
lines.empty? ? feedback : lines.join
|
|
206
|
+
end
|
|
207
|
+
|
|
208
|
+
sig { params(feedback: String).returns(String) }
|
|
209
|
+
def extract_observation_feedback(feedback)
|
|
210
|
+
# Extract feedback relevant to observation interpretation
|
|
211
|
+
lines = feedback.lines.select do |line|
|
|
212
|
+
line.match?(/observ|interpret|understand|result|output/i) ||
|
|
213
|
+
line.match?(/score|critique|suggestion/i)
|
|
214
|
+
end
|
|
215
|
+
|
|
216
|
+
lines.empty? ? feedback : lines.join
|
|
217
|
+
end
|
|
218
|
+
|
|
219
|
+
sig { params(result: T.untyped, current: T.nilable(Types::Instructions)).void }
|
|
220
|
+
def save_optimized_instructions(result, current)
|
|
221
|
+
optimized_program = result.optimized_program
|
|
222
|
+
|
|
223
|
+
# Extract instructions from the optimized predictors
|
|
224
|
+
thought_instruction = extract_predictor_instruction(optimized_program, 'thought_generator')
|
|
225
|
+
observation_instruction = extract_predictor_instruction(optimized_program, 'observation_processor')
|
|
226
|
+
|
|
227
|
+
# Calculate new generation number
|
|
228
|
+
generation = current ? current.generation + 1 : 1
|
|
229
|
+
best_score = result.best_score_value || 0.0
|
|
230
|
+
|
|
231
|
+
# Save to state
|
|
232
|
+
@state.save_instructions(
|
|
233
|
+
instructions: {
|
|
234
|
+
thought_generator: thought_instruction,
|
|
235
|
+
observation_processor: observation_instruction
|
|
236
|
+
},
|
|
237
|
+
score: best_score,
|
|
238
|
+
generation: generation
|
|
239
|
+
)
|
|
240
|
+
end
|
|
241
|
+
|
|
242
|
+
sig { params(program: DSPy::Module, predictor_name: String).returns(T.nilable(String)) }
|
|
243
|
+
def extract_predictor_instruction(program, predictor_name)
|
|
244
|
+
predictors = program.named_predictors
|
|
245
|
+
predictor = predictors.find { |name, _| name == predictor_name }&.last
|
|
246
|
+
|
|
247
|
+
return nil unless predictor
|
|
248
|
+
|
|
249
|
+
if predictor.respond_to?(:prompt) && predictor.prompt.respond_to?(:instruction)
|
|
250
|
+
predictor.prompt.instruction
|
|
251
|
+
elsif predictor.respond_to?(:instruction)
|
|
252
|
+
predictor.instruction
|
|
253
|
+
end
|
|
254
|
+
end
|
|
255
|
+
|
|
256
|
+
sig { params(model_id: String).returns(T.nilable(String)) }
|
|
257
|
+
def resolve_api_key(model_id)
|
|
258
|
+
provider = model_id.split('/').first
|
|
259
|
+
case provider
|
|
260
|
+
when 'openai' then ENV['OPENAI_API_KEY']
|
|
261
|
+
when 'anthropic' then ENV['ANTHROPIC_API_KEY']
|
|
262
|
+
when 'gemini', 'google' then ENV['GEMINI_API_KEY']
|
|
263
|
+
else ENV['OPENAI_API_KEY']
|
|
264
|
+
end
|
|
265
|
+
end
|
|
266
|
+
end
|
|
267
|
+
end
|