dspy 0.5.1 → 0.6.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +1 -0
- data/lib/dspy/code_act.rb +463 -0
- data/lib/dspy/instrumentation.rb +15 -0
- data/lib/dspy/lm/adapters/anthropic_adapter.rb +106 -0
- data/lib/dspy/lm.rb +21 -7
- data/lib/dspy/memory/embedding_engine.rb +68 -0
- data/lib/dspy/memory/in_memory_store.rb +216 -0
- data/lib/dspy/memory/local_embedding_engine.rb +241 -0
- data/lib/dspy/memory/memory_compactor.rb +299 -0
- data/lib/dspy/memory/memory_manager.rb +248 -0
- data/lib/dspy/memory/memory_record.rb +163 -0
- data/lib/dspy/memory/memory_store.rb +90 -0
- data/lib/dspy/memory.rb +30 -0
- data/lib/dspy/mixins/instrumentation_helpers.rb +3 -5
- data/lib/dspy/mixins/type_coercion.rb +3 -0
- data/lib/dspy/prompt.rb +48 -1
- data/lib/dspy/subscribers/logger_subscriber.rb +91 -1
- data/lib/dspy/tools/base.rb +1 -1
- data/lib/dspy/tools/memory_toolset.rb +117 -0
- data/lib/dspy/tools/text_processing_toolset.rb +186 -0
- data/lib/dspy/tools/toolset.rb +223 -0
- data/lib/dspy/tools.rb +1 -0
- data/lib/dspy/version.rb +1 -1
- data/lib/dspy.rb +2 -0
- metadata +28 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: e82dded62b8c11ca1ac69c75d0cdffd04e9e58daa5dcb16e66d794dbc37ee49e
|
4
|
+
data.tar.gz: 3d58da2a9ff3d76b9e0d5960e9e6df3dc0ac120ef97c99c640f05129d9990a77
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: e25cecdaaf81a6985a1e6cf5bd7a709efc568c89cb687a8e9bd3e593c6e41e352ffe07798275bfca44bf0f61f9b7cc8483c57978b8c56099bf0739f00573f587
|
7
|
+
data.tar.gz: c465fb9231b2596b155ff839047b4b1e7a92ad2e40f98f7faabbc5eaa2b632fbd566c8d3e3c0005e0196f46ca3a0b4927fc9f64aa7bd6db262b2ae0c823e4aad
|
data/README.md
CHANGED
@@ -15,6 +15,7 @@ The result? LLM applications that actually scale and don't break when you sneeze
|
|
15
15
|
- **Predict** - Basic LLM completion with structured data
|
16
16
|
- **Chain of Thought** - Step-by-step reasoning for complex problems
|
17
17
|
- **ReAct** - Tool-using agents with basic tool integration
|
18
|
+
- **CodeAct** - Dynamic code execution agents for programming tasks
|
18
19
|
- **Manual Composition** - Combine multiple LLM calls into workflows
|
19
20
|
|
20
21
|
**Optimization & Evaluation:**
|
@@ -0,0 +1,463 @@
|
|
1
|
+
# typed: strict
|
2
|
+
# frozen_string_literal: true
|
3
|
+
|
4
|
+
require 'sorbet-runtime'
|
5
|
+
require_relative 'predict'
|
6
|
+
require_relative 'signature'
|
7
|
+
require 'json'
|
8
|
+
require 'stringio'
|
9
|
+
require_relative 'instrumentation'
|
10
|
+
require_relative 'mixins/struct_builder'
|
11
|
+
require_relative 'mixins/instrumentation_helpers'
|
12
|
+
|
13
|
+
module DSPy
|
14
|
+
# Define a simple struct for CodeAct history entries with proper type annotations
|
15
|
+
class CodeActHistoryEntry < T::Struct
|
16
|
+
const :step, Integer
|
17
|
+
prop :thought, T.nilable(String)
|
18
|
+
prop :ruby_code, T.nilable(String)
|
19
|
+
prop :execution_result, T.nilable(String)
|
20
|
+
prop :error_message, String
|
21
|
+
|
22
|
+
# Custom serialization to ensure compatibility with the rest of the code
|
23
|
+
def to_h
|
24
|
+
{
|
25
|
+
step: step,
|
26
|
+
thought: thought,
|
27
|
+
ruby_code: ruby_code,
|
28
|
+
execution_result: execution_result,
|
29
|
+
error_message: error_message
|
30
|
+
}.compact
|
31
|
+
end
|
32
|
+
end
|
33
|
+
|
34
|
+
# Defines the signature for Ruby code generation
|
35
|
+
class RubyCodeGeneration < DSPy::Signature
|
36
|
+
description "Generate Ruby code to solve the given task."
|
37
|
+
|
38
|
+
input do
|
39
|
+
const :task, String,
|
40
|
+
description: "The task description requiring Ruby code solution"
|
41
|
+
const :context, String,
|
42
|
+
description: "Available variables and previous results from code execution history"
|
43
|
+
const :history, T::Array[CodeActHistoryEntry],
|
44
|
+
description: "Previous thoughts and code executions with their results. Use this to understand what has been tried and what variables are available."
|
45
|
+
end
|
46
|
+
|
47
|
+
output do
|
48
|
+
const :thought, String,
|
49
|
+
description: "Reasoning about the approach to solve the task with Ruby code"
|
50
|
+
const :ruby_code, String,
|
51
|
+
description: "Ruby code to execute. This should be valid Ruby code that can be evaluated safely. Avoid system calls, file operations, or other potentially dangerous operations."
|
52
|
+
const :explanation, String,
|
53
|
+
description: "Brief explanation of what the code does and why this approach was chosen"
|
54
|
+
end
|
55
|
+
end
|
56
|
+
|
57
|
+
class CodeActNextStep < T::Enum
|
58
|
+
enums do
|
59
|
+
Continue = new("continue")
|
60
|
+
Finish = new("finish")
|
61
|
+
end
|
62
|
+
end
|
63
|
+
|
64
|
+
# Defines the signature for processing code execution results
|
65
|
+
class RubyCodeObservation < DSPy::Signature
|
66
|
+
description "Process the result of Ruby code execution and decide what to do next."
|
67
|
+
|
68
|
+
input do
|
69
|
+
const :task, String,
|
70
|
+
description: "The original task"
|
71
|
+
const :history, T::Array[CodeActHistoryEntry],
|
72
|
+
description: "Previous thoughts, code executions, and their results"
|
73
|
+
const :execution_result, T.nilable(String),
|
74
|
+
description: "The result from executing the Ruby code"
|
75
|
+
const :error_message, String,
|
76
|
+
description: "Error message if the code execution failed (empty string if no error)"
|
77
|
+
end
|
78
|
+
|
79
|
+
output do
|
80
|
+
const :observation, String,
|
81
|
+
description: "Analysis of the execution result and what it means for solving the task"
|
82
|
+
const :next_step, CodeActNextStep,
|
83
|
+
description: "What to do next: '#{CodeActNextStep::Continue}' to continue with more code or '#{CodeActNextStep::Finish}' if the task is complete"
|
84
|
+
const :final_answer, T.nilable(String),
|
85
|
+
description: "If next_step is 'finish', provide the final answer to the task based on the execution results"
|
86
|
+
end
|
87
|
+
end
|
88
|
+
|
89
|
+
# CodeAct Agent using Think-Code-Observe pattern
|
90
|
+
class CodeAct < Predict
|
91
|
+
extend T::Sig
|
92
|
+
include Mixins::StructBuilder
|
93
|
+
include Mixins::InstrumentationHelpers
|
94
|
+
|
95
|
+
sig { returns(T.class_of(DSPy::Signature)) }
|
96
|
+
attr_reader :original_signature_class
|
97
|
+
|
98
|
+
sig { returns(T.class_of(T::Struct)) }
|
99
|
+
attr_reader :enhanced_output_struct
|
100
|
+
|
101
|
+
sig { returns(Integer) }
|
102
|
+
attr_reader :max_iterations
|
103
|
+
|
104
|
+
sig { returns(T::Hash[Symbol, T.untyped]) }
|
105
|
+
attr_reader :execution_context
|
106
|
+
|
107
|
+
sig { params(signature_class: T.class_of(DSPy::Signature), max_iterations: Integer).void }
|
108
|
+
def initialize(signature_class, max_iterations: 10)
|
109
|
+
@original_signature_class = signature_class
|
110
|
+
@max_iterations = max_iterations
|
111
|
+
@execution_context = T.let({}, T::Hash[Symbol, T.untyped])
|
112
|
+
|
113
|
+
# Create code generator using Predict to preserve field descriptions
|
114
|
+
@code_generator = T.let(DSPy::Predict.new(RubyCodeGeneration), DSPy::Predict)
|
115
|
+
|
116
|
+
# Create observation processor using Predict to preserve field descriptions
|
117
|
+
@observation_processor = T.let(DSPy::Predict.new(RubyCodeObservation), DSPy::Predict)
|
118
|
+
|
119
|
+
# Create enhanced output struct with CodeAct fields
|
120
|
+
@enhanced_output_struct = create_enhanced_output_struct(signature_class)
|
121
|
+
enhanced_output_struct = @enhanced_output_struct
|
122
|
+
|
123
|
+
# Create enhanced signature class
|
124
|
+
enhanced_signature = Class.new(DSPy::Signature) do
|
125
|
+
# Set the description
|
126
|
+
description signature_class.description
|
127
|
+
|
128
|
+
# Use the same input struct
|
129
|
+
@input_struct_class = signature_class.input_struct_class
|
130
|
+
|
131
|
+
# Use the enhanced output struct with CodeAct fields
|
132
|
+
@output_struct_class = enhanced_output_struct
|
133
|
+
|
134
|
+
class << self
|
135
|
+
attr_reader :input_struct_class, :output_struct_class
|
136
|
+
end
|
137
|
+
end
|
138
|
+
|
139
|
+
# Call parent constructor with enhanced signature
|
140
|
+
super(enhanced_signature)
|
141
|
+
end
|
142
|
+
|
143
|
+
sig { params(kwargs: T.untyped).returns(T.untyped).override }
|
144
|
+
def forward(**kwargs)
|
145
|
+
lm = config.lm || DSPy.config.lm
|
146
|
+
|
147
|
+
# Instrument the entire CodeAct agent lifecycle
|
148
|
+
result = instrument_prediction('dspy.codeact', @original_signature_class, kwargs, {
|
149
|
+
max_iterations: @max_iterations
|
150
|
+
}) do
|
151
|
+
# Validate input and extract task
|
152
|
+
input_struct = @original_signature_class.input_struct_class.new(**kwargs)
|
153
|
+
task = T.cast(input_struct.serialize.values.first, String)
|
154
|
+
|
155
|
+
# Execute CodeAct reasoning loop
|
156
|
+
reasoning_result = execute_codeact_reasoning_loop(task)
|
157
|
+
|
158
|
+
# Create enhanced output with all CodeAct data
|
159
|
+
create_enhanced_result(kwargs, reasoning_result)
|
160
|
+
end
|
161
|
+
|
162
|
+
result
|
163
|
+
end
|
164
|
+
|
165
|
+
private
|
166
|
+
|
167
|
+
# Executes the main CodeAct reasoning loop (Think-Code-Observe)
|
168
|
+
sig { params(task: String).returns(T::Hash[Symbol, T.untyped]) }
|
169
|
+
def execute_codeact_reasoning_loop(task)
|
170
|
+
history = T.let([], T::Array[CodeActHistoryEntry])
|
171
|
+
final_answer = T.let(nil, T.nilable(String))
|
172
|
+
iterations_count = 0
|
173
|
+
context = ""
|
174
|
+
|
175
|
+
while should_continue_iteration?(iterations_count, final_answer)
|
176
|
+
iterations_count += 1
|
177
|
+
|
178
|
+
iteration_result = execute_single_iteration(
|
179
|
+
task, history, context, iterations_count
|
180
|
+
)
|
181
|
+
|
182
|
+
if iteration_result[:should_finish]
|
183
|
+
final_answer = iteration_result[:final_answer]
|
184
|
+
break
|
185
|
+
end
|
186
|
+
|
187
|
+
history = iteration_result[:history]
|
188
|
+
context = iteration_result[:context]
|
189
|
+
end
|
190
|
+
|
191
|
+
handle_max_iterations_if_needed(iterations_count, final_answer, history)
|
192
|
+
|
193
|
+
{
|
194
|
+
history: history,
|
195
|
+
iterations: iterations_count,
|
196
|
+
final_answer: final_answer || default_no_answer_message,
|
197
|
+
execution_context: @execution_context
|
198
|
+
}
|
199
|
+
end
|
200
|
+
|
201
|
+
# Executes a single iteration of the Think-Code-Observe loop
|
202
|
+
sig { params(task: String, history: T::Array[CodeActHistoryEntry], context: String, iteration: Integer).returns(T::Hash[Symbol, T.untyped]) }
|
203
|
+
def execute_single_iteration(task, history, context, iteration)
|
204
|
+
Instrumentation.instrument('dspy.codeact.iteration', {
|
205
|
+
iteration: iteration,
|
206
|
+
max_iterations: @max_iterations,
|
207
|
+
history_length: history.length
|
208
|
+
}) do
|
209
|
+
execution_state = execute_think_code_step(task, context, history, iteration)
|
210
|
+
|
211
|
+
observation_decision = process_observation_and_decide_next_step(
|
212
|
+
task, execution_state[:history], execution_state[:execution_result],
|
213
|
+
execution_state[:error_message], iteration
|
214
|
+
)
|
215
|
+
|
216
|
+
if observation_decision[:should_finish]
|
217
|
+
return { should_finish: true, final_answer: observation_decision[:final_answer] }
|
218
|
+
end
|
219
|
+
|
220
|
+
finalize_iteration(execution_state, iteration)
|
221
|
+
end
|
222
|
+
end
|
223
|
+
|
224
|
+
# Executes the Think-Code step: generates code and executes it
|
225
|
+
sig { params(task: String, context: String, history: T::Array[CodeActHistoryEntry], iteration: Integer).returns(T::Hash[Symbol, T.untyped]) }
|
226
|
+
def execute_think_code_step(task, context, history, iteration)
|
227
|
+
code_obj = @code_generator.forward(
|
228
|
+
task: task,
|
229
|
+
context: context.empty? ? "No previous context available." : context,
|
230
|
+
history: history
|
231
|
+
)
|
232
|
+
|
233
|
+
execution_result, error_message = execute_ruby_code_with_instrumentation(
|
234
|
+
code_obj.ruby_code, iteration
|
235
|
+
)
|
236
|
+
|
237
|
+
history << create_history_entry(
|
238
|
+
iteration, code_obj.thought, code_obj.ruby_code,
|
239
|
+
execution_result, error_message
|
240
|
+
)
|
241
|
+
|
242
|
+
{
|
243
|
+
history: history,
|
244
|
+
thought: code_obj.thought,
|
245
|
+
ruby_code: code_obj.ruby_code,
|
246
|
+
execution_result: execution_result,
|
247
|
+
error_message: error_message
|
248
|
+
}
|
249
|
+
end
|
250
|
+
|
251
|
+
# Finalizes iteration by updating context and emitting events
|
252
|
+
sig { params(execution_state: T::Hash[Symbol, T.untyped], iteration: Integer).returns(T::Hash[Symbol, T.untyped]) }
|
253
|
+
def finalize_iteration(execution_state, iteration)
|
254
|
+
new_context = build_context_from_history(execution_state[:history])
|
255
|
+
|
256
|
+
emit_iteration_complete_event(
|
257
|
+
iteration, execution_state[:thought], execution_state[:ruby_code],
|
258
|
+
execution_state[:execution_result], execution_state[:error_message]
|
259
|
+
)
|
260
|
+
|
261
|
+
{
|
262
|
+
should_finish: false,
|
263
|
+
history: execution_state[:history],
|
264
|
+
context: new_context
|
265
|
+
}
|
266
|
+
end
|
267
|
+
|
268
|
+
# Creates enhanced output struct with CodeAct-specific fields
|
269
|
+
sig { params(signature_class: T.class_of(DSPy::Signature)).returns(T.class_of(T::Struct)) }
|
270
|
+
def create_enhanced_output_struct(signature_class)
|
271
|
+
input_props = signature_class.input_struct_class.props
|
272
|
+
output_props = signature_class.output_struct_class.props
|
273
|
+
|
274
|
+
build_enhanced_struct(
|
275
|
+
{ input: input_props, output: output_props },
|
276
|
+
{
|
277
|
+
history: [T::Array[T::Hash[Symbol, T.untyped]], "CodeAct execution history"],
|
278
|
+
iterations: [Integer, "Number of iterations executed"],
|
279
|
+
execution_context: [T::Hash[Symbol, T.untyped], "Variables and context from code execution"]
|
280
|
+
}
|
281
|
+
)
|
282
|
+
end
|
283
|
+
|
284
|
+
# Creates enhanced result struct
|
285
|
+
sig { params(input_kwargs: T::Hash[Symbol, T.untyped], reasoning_result: T::Hash[Symbol, T.untyped]).returns(T.untyped) }
|
286
|
+
def create_enhanced_result(input_kwargs, reasoning_result)
|
287
|
+
output_field_name = @original_signature_class.output_struct_class.props.keys.first
|
288
|
+
|
289
|
+
output_data = input_kwargs.merge({
|
290
|
+
history: reasoning_result[:history].map(&:to_h),
|
291
|
+
iterations: reasoning_result[:iterations],
|
292
|
+
execution_context: reasoning_result[:execution_context]
|
293
|
+
})
|
294
|
+
output_data[output_field_name] = reasoning_result[:final_answer]
|
295
|
+
|
296
|
+
@enhanced_output_struct.new(**output_data)
|
297
|
+
end
|
298
|
+
|
299
|
+
# Helper methods for CodeAct logic
|
300
|
+
sig { params(iterations_count: Integer, final_answer: T.nilable(String)).returns(T::Boolean) }
|
301
|
+
def should_continue_iteration?(iterations_count, final_answer)
|
302
|
+
final_answer.nil? && (@max_iterations.nil? || iterations_count < @max_iterations)
|
303
|
+
end
|
304
|
+
|
305
|
+
sig { params(ruby_code: String, iteration: Integer).returns([T.nilable(String), String]) }
|
306
|
+
def execute_ruby_code_with_instrumentation(ruby_code, iteration)
|
307
|
+
Instrumentation.instrument('dspy.codeact.code_execution', {
|
308
|
+
iteration: iteration,
|
309
|
+
code_length: ruby_code.length
|
310
|
+
}) do
|
311
|
+
execute_ruby_code_safely(ruby_code)
|
312
|
+
end
|
313
|
+
end
|
314
|
+
|
315
|
+
sig { params(step: Integer, thought: String, ruby_code: String, execution_result: T.nilable(String), error_message: String).returns(CodeActHistoryEntry) }
|
316
|
+
def create_history_entry(step, thought, ruby_code, execution_result, error_message)
|
317
|
+
CodeActHistoryEntry.new(
|
318
|
+
step: step,
|
319
|
+
thought: thought,
|
320
|
+
ruby_code: ruby_code,
|
321
|
+
execution_result: execution_result,
|
322
|
+
error_message: error_message
|
323
|
+
)
|
324
|
+
end
|
325
|
+
|
326
|
+
sig { params(task: String, history: T::Array[CodeActHistoryEntry], execution_result: T.nilable(String), error_message: String, iteration: Integer).returns(T::Hash[Symbol, T.untyped]) }
|
327
|
+
def process_observation_and_decide_next_step(task, history, execution_result, error_message, iteration)
|
328
|
+
observation_result = @observation_processor.forward(
|
329
|
+
task: task,
|
330
|
+
history: history,
|
331
|
+
execution_result: execution_result,
|
332
|
+
error_message: error_message
|
333
|
+
)
|
334
|
+
|
335
|
+
return { should_finish: false } unless observation_result.next_step == CodeActNextStep::Finish
|
336
|
+
|
337
|
+
final_answer = observation_result.final_answer || execution_result || "Task completed"
|
338
|
+
|
339
|
+
{ should_finish: true, final_answer: final_answer }
|
340
|
+
end
|
341
|
+
|
342
|
+
sig { params(history: T::Array[CodeActHistoryEntry]).returns(String) }
|
343
|
+
def build_context_from_history(history)
|
344
|
+
context_parts = []
|
345
|
+
|
346
|
+
history.each do |entry|
|
347
|
+
if entry.execution_result && !entry.execution_result.empty?
|
348
|
+
context_parts << "Step #{entry.step} result: #{entry.execution_result}"
|
349
|
+
end
|
350
|
+
end
|
351
|
+
|
352
|
+
context_parts.join("\n")
|
353
|
+
end
|
354
|
+
|
355
|
+
sig { params(iteration: Integer, thought: String, ruby_code: String, execution_result: T.nilable(String), error_message: T.nilable(String)).void }
|
356
|
+
def emit_iteration_complete_event(iteration, thought, ruby_code, execution_result, error_message)
|
357
|
+
Instrumentation.emit('dspy.codeact.iteration_complete', {
|
358
|
+
iteration: iteration,
|
359
|
+
thought: thought,
|
360
|
+
ruby_code: ruby_code,
|
361
|
+
execution_result: execution_result,
|
362
|
+
error_message: error_message,
|
363
|
+
success: error_message.nil?
|
364
|
+
})
|
365
|
+
end
|
366
|
+
|
367
|
+
sig { params(iterations_count: Integer, final_answer: T.nilable(String), history: T::Array[CodeActHistoryEntry]).void }
|
368
|
+
def handle_max_iterations_if_needed(iterations_count, final_answer, history)
|
369
|
+
if iterations_count >= @max_iterations && final_answer.nil?
|
370
|
+
Instrumentation.emit('dspy.codeact.max_iterations', {
|
371
|
+
iteration_count: iterations_count,
|
372
|
+
max_iterations: @max_iterations,
|
373
|
+
final_history_length: history.length
|
374
|
+
})
|
375
|
+
end
|
376
|
+
end
|
377
|
+
|
378
|
+
sig { returns(String) }
|
379
|
+
def default_no_answer_message
|
380
|
+
"No solution reached within #{@max_iterations} iterations"
|
381
|
+
end
|
382
|
+
|
383
|
+
# Safe Ruby code execution method - placeholder for now
|
384
|
+
sig { params(ruby_code: String).returns([T.nilable(String), String]) }
|
385
|
+
def execute_ruby_code_safely(ruby_code)
|
386
|
+
# TODO: Implement proper sandboxing in Phase 2
|
387
|
+
# For now, use basic eval with error handling
|
388
|
+
original_stdout = nil
|
389
|
+
captured_output = nil
|
390
|
+
|
391
|
+
begin
|
392
|
+
# Capture stdout to get print/puts output
|
393
|
+
original_stdout = $stdout
|
394
|
+
captured_output = StringIO.new
|
395
|
+
$stdout = captured_output
|
396
|
+
|
397
|
+
result = eval(ruby_code, binding)
|
398
|
+
|
399
|
+
# Get the captured output
|
400
|
+
output = captured_output.string
|
401
|
+
|
402
|
+
# If there's captured output, use it, otherwise use the eval result
|
403
|
+
final_result = output.empty? ? result.to_s : output.chomp
|
404
|
+
|
405
|
+
[final_result, ""]
|
406
|
+
rescue SyntaxError => e
|
407
|
+
[nil, "Error: #{e.message}"]
|
408
|
+
rescue => e
|
409
|
+
[nil, "Error: #{e.message}"]
|
410
|
+
ensure
|
411
|
+
$stdout = original_stdout if original_stdout
|
412
|
+
end
|
413
|
+
end
|
414
|
+
|
415
|
+
sig { params(output: T.untyped).void }
|
416
|
+
def validate_output_schema!(output)
|
417
|
+
# Validate that output is an instance of the enhanced output struct
|
418
|
+
unless output.is_a?(@enhanced_output_struct)
|
419
|
+
raise "Output must be an instance of #{@enhanced_output_struct}, got #{output.class}"
|
420
|
+
end
|
421
|
+
|
422
|
+
# Validate original signature output fields are present
|
423
|
+
@original_signature_class.output_struct_class.props.each do |field_name, _prop|
|
424
|
+
unless output.respond_to?(field_name)
|
425
|
+
raise "Missing required field: #{field_name}"
|
426
|
+
end
|
427
|
+
end
|
428
|
+
|
429
|
+
# Validate CodeAct-specific fields
|
430
|
+
unless output.respond_to?(:history) && output.history.is_a?(Array)
|
431
|
+
raise "Missing or invalid history field"
|
432
|
+
end
|
433
|
+
|
434
|
+
unless output.respond_to?(:iterations) && output.iterations.is_a?(Integer)
|
435
|
+
raise "Missing or invalid iterations field"
|
436
|
+
end
|
437
|
+
|
438
|
+
unless output.respond_to?(:execution_context) && output.execution_context.is_a?(Hash)
|
439
|
+
raise "Missing or invalid execution_context field"
|
440
|
+
end
|
441
|
+
end
|
442
|
+
|
443
|
+
sig { returns(T::Hash[Symbol, T.untyped]) }
|
444
|
+
def generate_example_output
|
445
|
+
# Create a base example structure
|
446
|
+
example = {}
|
447
|
+
|
448
|
+
# Add CodeAct-specific example data
|
449
|
+
example[:history] = [
|
450
|
+
{
|
451
|
+
step: 1,
|
452
|
+
thought: "I need to write Ruby code to solve this task...",
|
453
|
+
ruby_code: "result = 2 + 2",
|
454
|
+
execution_result: "4",
|
455
|
+
error_message: nil
|
456
|
+
}
|
457
|
+
]
|
458
|
+
example[:iterations] = 1
|
459
|
+
example[:execution_context] = { result: 4 }
|
460
|
+
example
|
461
|
+
end
|
462
|
+
end
|
463
|
+
end
|
data/lib/dspy/instrumentation.rb
CHANGED
@@ -47,6 +47,13 @@ module DSPy
|
|
47
47
|
n.register_event('dspy.react.iteration_complete')
|
48
48
|
n.register_event('dspy.react.max_iterations')
|
49
49
|
|
50
|
+
# CodeAct events
|
51
|
+
n.register_event('dspy.codeact')
|
52
|
+
n.register_event('dspy.codeact.iteration')
|
53
|
+
n.register_event('dspy.codeact.code_execution')
|
54
|
+
n.register_event('dspy.codeact.iteration_complete')
|
55
|
+
n.register_event('dspy.codeact.max_iterations')
|
56
|
+
|
50
57
|
# Evaluation events
|
51
58
|
n.register_event('dspy.evaluation.start')
|
52
59
|
n.register_event('dspy.evaluation.example')
|
@@ -80,6 +87,14 @@ module DSPy
|
|
80
87
|
n.register_event('dspy.storage.import')
|
81
88
|
n.register_event('dspy.storage.cleanup')
|
82
89
|
|
90
|
+
# Memory compaction events
|
91
|
+
n.register_event('dspy.memory.compaction_check')
|
92
|
+
n.register_event('dspy.memory.size_compaction')
|
93
|
+
n.register_event('dspy.memory.age_compaction')
|
94
|
+
n.register_event('dspy.memory.deduplication')
|
95
|
+
n.register_event('dspy.memory.relevance_pruning')
|
96
|
+
n.register_event('dspy.memory.compaction_complete')
|
97
|
+
|
83
98
|
# Registry events
|
84
99
|
n.register_event('dspy.registry.register_start')
|
85
100
|
n.register_event('dspy.registry.register_complete')
|
@@ -14,6 +14,9 @@ module DSPy
|
|
14
14
|
# Anthropic requires system message to be separate from messages
|
15
15
|
system_message, user_messages = extract_system_message(normalize_messages(messages))
|
16
16
|
|
17
|
+
# Apply JSON prefilling if needed for better Claude JSON compliance
|
18
|
+
user_messages = prepare_messages_for_json(user_messages, system_message)
|
19
|
+
|
17
20
|
request_params = {
|
18
21
|
model: model,
|
19
22
|
messages: user_messages,
|
@@ -77,6 +80,109 @@ module DSPy
|
|
77
80
|
|
78
81
|
private
|
79
82
|
|
83
|
+
# Enhanced JSON extraction specifically for Claude models
|
84
|
+
# Handles multiple patterns of markdown-wrapped JSON responses
|
85
|
+
def extract_json_from_response(content)
|
86
|
+
return content if content.nil? || content.empty?
|
87
|
+
|
88
|
+
# Pattern 1: ```json blocks
|
89
|
+
if content.include?('```json')
|
90
|
+
extracted = content[/```json\s*\n(.*?)\n```/m, 1]
|
91
|
+
return extracted.strip if extracted
|
92
|
+
end
|
93
|
+
|
94
|
+
# Pattern 2: ## Output values header
|
95
|
+
if content.include?('## Output values')
|
96
|
+
extracted = content.split('## Output values').last
|
97
|
+
.gsub(/```json\s*\n/, '')
|
98
|
+
.gsub(/\n```.*/, '')
|
99
|
+
.strip
|
100
|
+
return extracted if extracted && !extracted.empty?
|
101
|
+
end
|
102
|
+
|
103
|
+
# Pattern 3: Generic code blocks (check if it looks like JSON)
|
104
|
+
if content.include?('```')
|
105
|
+
extracted = content[/```\s*\n(.*?)\n```/m, 1]
|
106
|
+
return extracted.strip if extracted && looks_like_json?(extracted)
|
107
|
+
end
|
108
|
+
|
109
|
+
# Pattern 4: Already valid JSON or fallback
|
110
|
+
content.strip
|
111
|
+
end
|
112
|
+
|
113
|
+
# Simple heuristic to check if content looks like JSON
|
114
|
+
def looks_like_json?(str)
|
115
|
+
return false if str.nil? || str.empty?
|
116
|
+
trimmed = str.strip
|
117
|
+
(trimmed.start_with?('{') && trimmed.end_with?('}')) ||
|
118
|
+
(trimmed.start_with?('[') && trimmed.end_with?(']'))
|
119
|
+
end
|
120
|
+
|
121
|
+
# Prepare messages for JSON output by adding prefilling and strong instructions
|
122
|
+
def prepare_messages_for_json(user_messages, system_message)
|
123
|
+
return user_messages unless requires_json_output?(user_messages, system_message)
|
124
|
+
return user_messages unless tends_to_wrap_json?
|
125
|
+
|
126
|
+
# Add strong JSON instruction to the last user message if not already present
|
127
|
+
enhanced_messages = enhance_json_instructions(user_messages)
|
128
|
+
|
129
|
+
# Only add prefill for models that support it and temporarily disable for testing
|
130
|
+
if false # supports_prefilling? - temporarily disabled
|
131
|
+
add_json_prefill(enhanced_messages)
|
132
|
+
else
|
133
|
+
enhanced_messages
|
134
|
+
end
|
135
|
+
end
|
136
|
+
|
137
|
+
# Detect if the conversation requires JSON output
|
138
|
+
def requires_json_output?(user_messages, system_message)
|
139
|
+
# Check for JSON-related keywords in messages
|
140
|
+
all_content = [system_message] + user_messages.map { |m| m[:content] }
|
141
|
+
all_content.compact.any? do |content|
|
142
|
+
content.downcase.include?('json') ||
|
143
|
+
content.include?('```') ||
|
144
|
+
content.include?('{') ||
|
145
|
+
content.include?('output')
|
146
|
+
end
|
147
|
+
end
|
148
|
+
|
149
|
+
# Check if this is a Claude model that benefits from prefilling
|
150
|
+
def supports_prefilling?
|
151
|
+
# Claude models that work well with JSON prefilling
|
152
|
+
model.downcase.include?('claude')
|
153
|
+
end
|
154
|
+
|
155
|
+
# Check if this is a Claude model that tends to wrap JSON in markdown
|
156
|
+
def tends_to_wrap_json?
|
157
|
+
# All Claude models have this tendency, especially Opus variants
|
158
|
+
model.downcase.include?('claude')
|
159
|
+
end
|
160
|
+
|
161
|
+
# Enhance the last user message with strong JSON instructions
|
162
|
+
def enhance_json_instructions(user_messages)
|
163
|
+
return user_messages if user_messages.empty?
|
164
|
+
|
165
|
+
enhanced_messages = user_messages.dup
|
166
|
+
last_message = enhanced_messages.last
|
167
|
+
|
168
|
+
# Only add instruction if not already present
|
169
|
+
unless last_message[:content].include?('ONLY valid JSON')
|
170
|
+
# Use smart default instruction for Claude models
|
171
|
+
json_instruction = "\n\nIMPORTANT: Respond with ONLY valid JSON. No markdown formatting, no code blocks, no explanations. Start your response with '{' and end with '}'."
|
172
|
+
|
173
|
+
last_message = last_message.dup
|
174
|
+
last_message[:content] = last_message[:content] + json_instruction
|
175
|
+
enhanced_messages[-1] = last_message
|
176
|
+
end
|
177
|
+
|
178
|
+
enhanced_messages
|
179
|
+
end
|
180
|
+
|
181
|
+
# Add assistant message prefill to guide Claude
|
182
|
+
def add_json_prefill(user_messages)
|
183
|
+
user_messages + [{ role: "assistant", content: "{" }]
|
184
|
+
end
|
185
|
+
|
80
186
|
def extract_system_message(messages)
|
81
187
|
system_message = nil
|
82
188
|
user_messages = []
|
data/lib/dspy/lm.rb
CHANGED
@@ -139,11 +139,16 @@ module DSPy
|
|
139
139
|
# Try to parse the response as JSON
|
140
140
|
content = response.content
|
141
141
|
|
142
|
-
#
|
143
|
-
if
|
144
|
-
content =
|
145
|
-
|
146
|
-
|
142
|
+
# Let adapters handle their own extraction logic if available
|
143
|
+
if adapter && adapter.respond_to?(:extract_json_from_response, true)
|
144
|
+
content = adapter.send(:extract_json_from_response, content)
|
145
|
+
else
|
146
|
+
# Fallback: Extract JSON if it's in a code block (legacy behavior)
|
147
|
+
if content.include?('```json')
|
148
|
+
content = content.split('```json').last.split('```').first.strip
|
149
|
+
elsif content.include?('```')
|
150
|
+
content = content.split('```').last.split('```').first.strip
|
151
|
+
end
|
147
152
|
end
|
148
153
|
|
149
154
|
begin
|
@@ -152,8 +157,17 @@ module DSPy
|
|
152
157
|
# For Sorbet signatures, just return the parsed JSON
|
153
158
|
# The Predict will handle validation
|
154
159
|
json_payload
|
155
|
-
rescue JSON::ParserError
|
156
|
-
|
160
|
+
rescue JSON::ParserError => e
|
161
|
+
# Enhanced error message with debugging information
|
162
|
+
error_details = {
|
163
|
+
original_content: response.content,
|
164
|
+
extracted_content: content,
|
165
|
+
provider: provider,
|
166
|
+
model: model
|
167
|
+
}
|
168
|
+
|
169
|
+
DSPy.logger.debug("JSON parsing failed: #{error_details}")
|
170
|
+
raise "Failed to parse LLM response as JSON: #{e.message}. Original content length: #{response.content&.length || 0} chars"
|
157
171
|
end
|
158
172
|
end
|
159
173
|
end
|