dspy 0.5.1 → 0.6.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 84036f6f01324f792e9de3e85fb90092747a3a021072cb65ef02d0911832dd23
4
- data.tar.gz: 368240d7747e0e381e39d117061aefa55161ec99b9a2dd4813b798f854e907b2
3
+ metadata.gz: e82dded62b8c11ca1ac69c75d0cdffd04e9e58daa5dcb16e66d794dbc37ee49e
4
+ data.tar.gz: 3d58da2a9ff3d76b9e0d5960e9e6df3dc0ac120ef97c99c640f05129d9990a77
5
5
  SHA512:
6
- metadata.gz: fe445ba22336550e234359eea194aa105f204332f117b44c560a9b170ec5f126192647f9edcba09a991fda1f1bcec8144239362cb95698b32b0526efd343d587
7
- data.tar.gz: b47bbd52e43b765009778c110dd58a69eb75873ce1e8a17dc0fe33c388e7c7dfd8cfcb2f848b0bc81102487acba5230a47d7a001501462ea7f9c7a1b0605fc05
6
+ metadata.gz: e25cecdaaf81a6985a1e6cf5bd7a709efc568c89cb687a8e9bd3e593c6e41e352ffe07798275bfca44bf0f61f9b7cc8483c57978b8c56099bf0739f00573f587
7
+ data.tar.gz: c465fb9231b2596b155ff839047b4b1e7a92ad2e40f98f7faabbc5eaa2b632fbd566c8d3e3c0005e0196f46ca3a0b4927fc9f64aa7bd6db262b2ae0c823e4aad
data/README.md CHANGED
@@ -15,6 +15,7 @@ The result? LLM applications that actually scale and don't break when you sneeze
15
15
  - **Predict** - Basic LLM completion with structured data
16
16
  - **Chain of Thought** - Step-by-step reasoning for complex problems
17
17
  - **ReAct** - Tool-using agents with basic tool integration
18
+ - **CodeAct** - Dynamic code execution agents for programming tasks
18
19
  - **Manual Composition** - Combine multiple LLM calls into workflows
19
20
 
20
21
  **Optimization & Evaluation:**
@@ -0,0 +1,463 @@
1
+ # typed: strict
2
+ # frozen_string_literal: true
3
+
4
+ require 'sorbet-runtime'
5
+ require_relative 'predict'
6
+ require_relative 'signature'
7
+ require 'json'
8
+ require 'stringio'
9
+ require_relative 'instrumentation'
10
+ require_relative 'mixins/struct_builder'
11
+ require_relative 'mixins/instrumentation_helpers'
12
+
13
+ module DSPy
14
+ # Define a simple struct for CodeAct history entries with proper type annotations
15
+ class CodeActHistoryEntry < T::Struct
16
+ const :step, Integer
17
+ prop :thought, T.nilable(String)
18
+ prop :ruby_code, T.nilable(String)
19
+ prop :execution_result, T.nilable(String)
20
+ prop :error_message, String
21
+
22
+ # Custom serialization to ensure compatibility with the rest of the code
23
+ def to_h
24
+ {
25
+ step: step,
26
+ thought: thought,
27
+ ruby_code: ruby_code,
28
+ execution_result: execution_result,
29
+ error_message: error_message
30
+ }.compact
31
+ end
32
+ end
33
+
34
+ # Defines the signature for Ruby code generation
35
+ class RubyCodeGeneration < DSPy::Signature
36
+ description "Generate Ruby code to solve the given task."
37
+
38
+ input do
39
+ const :task, String,
40
+ description: "The task description requiring Ruby code solution"
41
+ const :context, String,
42
+ description: "Available variables and previous results from code execution history"
43
+ const :history, T::Array[CodeActHistoryEntry],
44
+ description: "Previous thoughts and code executions with their results. Use this to understand what has been tried and what variables are available."
45
+ end
46
+
47
+ output do
48
+ const :thought, String,
49
+ description: "Reasoning about the approach to solve the task with Ruby code"
50
+ const :ruby_code, String,
51
+ description: "Ruby code to execute. This should be valid Ruby code that can be evaluated safely. Avoid system calls, file operations, or other potentially dangerous operations."
52
+ const :explanation, String,
53
+ description: "Brief explanation of what the code does and why this approach was chosen"
54
+ end
55
+ end
56
+
57
+ class CodeActNextStep < T::Enum
58
+ enums do
59
+ Continue = new("continue")
60
+ Finish = new("finish")
61
+ end
62
+ end
63
+
64
+ # Defines the signature for processing code execution results
65
+ class RubyCodeObservation < DSPy::Signature
66
+ description "Process the result of Ruby code execution and decide what to do next."
67
+
68
+ input do
69
+ const :task, String,
70
+ description: "The original task"
71
+ const :history, T::Array[CodeActHistoryEntry],
72
+ description: "Previous thoughts, code executions, and their results"
73
+ const :execution_result, T.nilable(String),
74
+ description: "The result from executing the Ruby code"
75
+ const :error_message, String,
76
+ description: "Error message if the code execution failed (empty string if no error)"
77
+ end
78
+
79
+ output do
80
+ const :observation, String,
81
+ description: "Analysis of the execution result and what it means for solving the task"
82
+ const :next_step, CodeActNextStep,
83
+ description: "What to do next: '#{CodeActNextStep::Continue}' to continue with more code or '#{CodeActNextStep::Finish}' if the task is complete"
84
+ const :final_answer, T.nilable(String),
85
+ description: "If next_step is 'finish', provide the final answer to the task based on the execution results"
86
+ end
87
+ end
88
+
89
+ # CodeAct Agent using Think-Code-Observe pattern
90
+ class CodeAct < Predict
91
+ extend T::Sig
92
+ include Mixins::StructBuilder
93
+ include Mixins::InstrumentationHelpers
94
+
95
+ sig { returns(T.class_of(DSPy::Signature)) }
96
+ attr_reader :original_signature_class
97
+
98
+ sig { returns(T.class_of(T::Struct)) }
99
+ attr_reader :enhanced_output_struct
100
+
101
+ sig { returns(Integer) }
102
+ attr_reader :max_iterations
103
+
104
+ sig { returns(T::Hash[Symbol, T.untyped]) }
105
+ attr_reader :execution_context
106
+
107
+ sig { params(signature_class: T.class_of(DSPy::Signature), max_iterations: Integer).void }
108
+ def initialize(signature_class, max_iterations: 10)
109
+ @original_signature_class = signature_class
110
+ @max_iterations = max_iterations
111
+ @execution_context = T.let({}, T::Hash[Symbol, T.untyped])
112
+
113
+ # Create code generator using Predict to preserve field descriptions
114
+ @code_generator = T.let(DSPy::Predict.new(RubyCodeGeneration), DSPy::Predict)
115
+
116
+ # Create observation processor using Predict to preserve field descriptions
117
+ @observation_processor = T.let(DSPy::Predict.new(RubyCodeObservation), DSPy::Predict)
118
+
119
+ # Create enhanced output struct with CodeAct fields
120
+ @enhanced_output_struct = create_enhanced_output_struct(signature_class)
121
+ enhanced_output_struct = @enhanced_output_struct
122
+
123
+ # Create enhanced signature class
124
+ enhanced_signature = Class.new(DSPy::Signature) do
125
+ # Set the description
126
+ description signature_class.description
127
+
128
+ # Use the same input struct
129
+ @input_struct_class = signature_class.input_struct_class
130
+
131
+ # Use the enhanced output struct with CodeAct fields
132
+ @output_struct_class = enhanced_output_struct
133
+
134
+ class << self
135
+ attr_reader :input_struct_class, :output_struct_class
136
+ end
137
+ end
138
+
139
+ # Call parent constructor with enhanced signature
140
+ super(enhanced_signature)
141
+ end
142
+
143
+ sig { params(kwargs: T.untyped).returns(T.untyped).override }
144
+ def forward(**kwargs)
145
+ lm = config.lm || DSPy.config.lm
146
+
147
+ # Instrument the entire CodeAct agent lifecycle
148
+ result = instrument_prediction('dspy.codeact', @original_signature_class, kwargs, {
149
+ max_iterations: @max_iterations
150
+ }) do
151
+ # Validate input and extract task
152
+ input_struct = @original_signature_class.input_struct_class.new(**kwargs)
153
+ task = T.cast(input_struct.serialize.values.first, String)
154
+
155
+ # Execute CodeAct reasoning loop
156
+ reasoning_result = execute_codeact_reasoning_loop(task)
157
+
158
+ # Create enhanced output with all CodeAct data
159
+ create_enhanced_result(kwargs, reasoning_result)
160
+ end
161
+
162
+ result
163
+ end
164
+
165
+ private
166
+
167
+ # Executes the main CodeAct reasoning loop (Think-Code-Observe)
168
+ sig { params(task: String).returns(T::Hash[Symbol, T.untyped]) }
169
+ def execute_codeact_reasoning_loop(task)
170
+ history = T.let([], T::Array[CodeActHistoryEntry])
171
+ final_answer = T.let(nil, T.nilable(String))
172
+ iterations_count = 0
173
+ context = ""
174
+
175
+ while should_continue_iteration?(iterations_count, final_answer)
176
+ iterations_count += 1
177
+
178
+ iteration_result = execute_single_iteration(
179
+ task, history, context, iterations_count
180
+ )
181
+
182
+ if iteration_result[:should_finish]
183
+ final_answer = iteration_result[:final_answer]
184
+ break
185
+ end
186
+
187
+ history = iteration_result[:history]
188
+ context = iteration_result[:context]
189
+ end
190
+
191
+ handle_max_iterations_if_needed(iterations_count, final_answer, history)
192
+
193
+ {
194
+ history: history,
195
+ iterations: iterations_count,
196
+ final_answer: final_answer || default_no_answer_message,
197
+ execution_context: @execution_context
198
+ }
199
+ end
200
+
201
+ # Executes a single iteration of the Think-Code-Observe loop
202
+ sig { params(task: String, history: T::Array[CodeActHistoryEntry], context: String, iteration: Integer).returns(T::Hash[Symbol, T.untyped]) }
203
+ def execute_single_iteration(task, history, context, iteration)
204
+ Instrumentation.instrument('dspy.codeact.iteration', {
205
+ iteration: iteration,
206
+ max_iterations: @max_iterations,
207
+ history_length: history.length
208
+ }) do
209
+ execution_state = execute_think_code_step(task, context, history, iteration)
210
+
211
+ observation_decision = process_observation_and_decide_next_step(
212
+ task, execution_state[:history], execution_state[:execution_result],
213
+ execution_state[:error_message], iteration
214
+ )
215
+
216
+ if observation_decision[:should_finish]
217
+ return { should_finish: true, final_answer: observation_decision[:final_answer] }
218
+ end
219
+
220
+ finalize_iteration(execution_state, iteration)
221
+ end
222
+ end
223
+
224
+ # Executes the Think-Code step: generates code and executes it
225
+ sig { params(task: String, context: String, history: T::Array[CodeActHistoryEntry], iteration: Integer).returns(T::Hash[Symbol, T.untyped]) }
226
+ def execute_think_code_step(task, context, history, iteration)
227
+ code_obj = @code_generator.forward(
228
+ task: task,
229
+ context: context.empty? ? "No previous context available." : context,
230
+ history: history
231
+ )
232
+
233
+ execution_result, error_message = execute_ruby_code_with_instrumentation(
234
+ code_obj.ruby_code, iteration
235
+ )
236
+
237
+ history << create_history_entry(
238
+ iteration, code_obj.thought, code_obj.ruby_code,
239
+ execution_result, error_message
240
+ )
241
+
242
+ {
243
+ history: history,
244
+ thought: code_obj.thought,
245
+ ruby_code: code_obj.ruby_code,
246
+ execution_result: execution_result,
247
+ error_message: error_message
248
+ }
249
+ end
250
+
251
+ # Finalizes iteration by updating context and emitting events
252
+ sig { params(execution_state: T::Hash[Symbol, T.untyped], iteration: Integer).returns(T::Hash[Symbol, T.untyped]) }
253
+ def finalize_iteration(execution_state, iteration)
254
+ new_context = build_context_from_history(execution_state[:history])
255
+
256
+ emit_iteration_complete_event(
257
+ iteration, execution_state[:thought], execution_state[:ruby_code],
258
+ execution_state[:execution_result], execution_state[:error_message]
259
+ )
260
+
261
+ {
262
+ should_finish: false,
263
+ history: execution_state[:history],
264
+ context: new_context
265
+ }
266
+ end
267
+
268
+ # Creates enhanced output struct with CodeAct-specific fields
269
+ sig { params(signature_class: T.class_of(DSPy::Signature)).returns(T.class_of(T::Struct)) }
270
+ def create_enhanced_output_struct(signature_class)
271
+ input_props = signature_class.input_struct_class.props
272
+ output_props = signature_class.output_struct_class.props
273
+
274
+ build_enhanced_struct(
275
+ { input: input_props, output: output_props },
276
+ {
277
+ history: [T::Array[T::Hash[Symbol, T.untyped]], "CodeAct execution history"],
278
+ iterations: [Integer, "Number of iterations executed"],
279
+ execution_context: [T::Hash[Symbol, T.untyped], "Variables and context from code execution"]
280
+ }
281
+ )
282
+ end
283
+
284
+ # Creates enhanced result struct
285
+ sig { params(input_kwargs: T::Hash[Symbol, T.untyped], reasoning_result: T::Hash[Symbol, T.untyped]).returns(T.untyped) }
286
+ def create_enhanced_result(input_kwargs, reasoning_result)
287
+ output_field_name = @original_signature_class.output_struct_class.props.keys.first
288
+
289
+ output_data = input_kwargs.merge({
290
+ history: reasoning_result[:history].map(&:to_h),
291
+ iterations: reasoning_result[:iterations],
292
+ execution_context: reasoning_result[:execution_context]
293
+ })
294
+ output_data[output_field_name] = reasoning_result[:final_answer]
295
+
296
+ @enhanced_output_struct.new(**output_data)
297
+ end
298
+
299
+ # Helper methods for CodeAct logic
300
+ sig { params(iterations_count: Integer, final_answer: T.nilable(String)).returns(T::Boolean) }
301
+ def should_continue_iteration?(iterations_count, final_answer)
302
+ final_answer.nil? && (@max_iterations.nil? || iterations_count < @max_iterations)
303
+ end
304
+
305
+ sig { params(ruby_code: String, iteration: Integer).returns([T.nilable(String), String]) }
306
+ def execute_ruby_code_with_instrumentation(ruby_code, iteration)
307
+ Instrumentation.instrument('dspy.codeact.code_execution', {
308
+ iteration: iteration,
309
+ code_length: ruby_code.length
310
+ }) do
311
+ execute_ruby_code_safely(ruby_code)
312
+ end
313
+ end
314
+
315
+ sig { params(step: Integer, thought: String, ruby_code: String, execution_result: T.nilable(String), error_message: String).returns(CodeActHistoryEntry) }
316
+ def create_history_entry(step, thought, ruby_code, execution_result, error_message)
317
+ CodeActHistoryEntry.new(
318
+ step: step,
319
+ thought: thought,
320
+ ruby_code: ruby_code,
321
+ execution_result: execution_result,
322
+ error_message: error_message
323
+ )
324
+ end
325
+
326
+ sig { params(task: String, history: T::Array[CodeActHistoryEntry], execution_result: T.nilable(String), error_message: String, iteration: Integer).returns(T::Hash[Symbol, T.untyped]) }
327
+ def process_observation_and_decide_next_step(task, history, execution_result, error_message, iteration)
328
+ observation_result = @observation_processor.forward(
329
+ task: task,
330
+ history: history,
331
+ execution_result: execution_result,
332
+ error_message: error_message
333
+ )
334
+
335
+ return { should_finish: false } unless observation_result.next_step == CodeActNextStep::Finish
336
+
337
+ final_answer = observation_result.final_answer || execution_result || "Task completed"
338
+
339
+ { should_finish: true, final_answer: final_answer }
340
+ end
341
+
342
+ sig { params(history: T::Array[CodeActHistoryEntry]).returns(String) }
343
+ def build_context_from_history(history)
344
+ context_parts = []
345
+
346
+ history.each do |entry|
347
+ if entry.execution_result && !entry.execution_result.empty?
348
+ context_parts << "Step #{entry.step} result: #{entry.execution_result}"
349
+ end
350
+ end
351
+
352
+ context_parts.join("\n")
353
+ end
354
+
355
+ sig { params(iteration: Integer, thought: String, ruby_code: String, execution_result: T.nilable(String), error_message: T.nilable(String)).void }
356
+ def emit_iteration_complete_event(iteration, thought, ruby_code, execution_result, error_message)
357
+ Instrumentation.emit('dspy.codeact.iteration_complete', {
358
+ iteration: iteration,
359
+ thought: thought,
360
+ ruby_code: ruby_code,
361
+ execution_result: execution_result,
362
+ error_message: error_message,
363
+ success: error_message.nil?
364
+ })
365
+ end
366
+
367
+ sig { params(iterations_count: Integer, final_answer: T.nilable(String), history: T::Array[CodeActHistoryEntry]).void }
368
+ def handle_max_iterations_if_needed(iterations_count, final_answer, history)
369
+ if iterations_count >= @max_iterations && final_answer.nil?
370
+ Instrumentation.emit('dspy.codeact.max_iterations', {
371
+ iteration_count: iterations_count,
372
+ max_iterations: @max_iterations,
373
+ final_history_length: history.length
374
+ })
375
+ end
376
+ end
377
+
378
+ sig { returns(String) }
379
+ def default_no_answer_message
380
+ "No solution reached within #{@max_iterations} iterations"
381
+ end
382
+
383
+ # Safe Ruby code execution method - placeholder for now
384
+ sig { params(ruby_code: String).returns([T.nilable(String), String]) }
385
+ def execute_ruby_code_safely(ruby_code)
386
+ # TODO: Implement proper sandboxing in Phase 2
387
+ # For now, use basic eval with error handling
388
+ original_stdout = nil
389
+ captured_output = nil
390
+
391
+ begin
392
+ # Capture stdout to get print/puts output
393
+ original_stdout = $stdout
394
+ captured_output = StringIO.new
395
+ $stdout = captured_output
396
+
397
+ result = eval(ruby_code, binding)
398
+
399
+ # Get the captured output
400
+ output = captured_output.string
401
+
402
+ # If there's captured output, use it, otherwise use the eval result
403
+ final_result = output.empty? ? result.to_s : output.chomp
404
+
405
+ [final_result, ""]
406
+ rescue SyntaxError => e
407
+ [nil, "Error: #{e.message}"]
408
+ rescue => e
409
+ [nil, "Error: #{e.message}"]
410
+ ensure
411
+ $stdout = original_stdout if original_stdout
412
+ end
413
+ end
414
+
415
+ sig { params(output: T.untyped).void }
416
+ def validate_output_schema!(output)
417
+ # Validate that output is an instance of the enhanced output struct
418
+ unless output.is_a?(@enhanced_output_struct)
419
+ raise "Output must be an instance of #{@enhanced_output_struct}, got #{output.class}"
420
+ end
421
+
422
+ # Validate original signature output fields are present
423
+ @original_signature_class.output_struct_class.props.each do |field_name, _prop|
424
+ unless output.respond_to?(field_name)
425
+ raise "Missing required field: #{field_name}"
426
+ end
427
+ end
428
+
429
+ # Validate CodeAct-specific fields
430
+ unless output.respond_to?(:history) && output.history.is_a?(Array)
431
+ raise "Missing or invalid history field"
432
+ end
433
+
434
+ unless output.respond_to?(:iterations) && output.iterations.is_a?(Integer)
435
+ raise "Missing or invalid iterations field"
436
+ end
437
+
438
+ unless output.respond_to?(:execution_context) && output.execution_context.is_a?(Hash)
439
+ raise "Missing or invalid execution_context field"
440
+ end
441
+ end
442
+
443
+ sig { returns(T::Hash[Symbol, T.untyped]) }
444
+ def generate_example_output
445
+ # Create a base example structure
446
+ example = {}
447
+
448
+ # Add CodeAct-specific example data
449
+ example[:history] = [
450
+ {
451
+ step: 1,
452
+ thought: "I need to write Ruby code to solve this task...",
453
+ ruby_code: "result = 2 + 2",
454
+ execution_result: "4",
455
+ error_message: nil
456
+ }
457
+ ]
458
+ example[:iterations] = 1
459
+ example[:execution_context] = { result: 4 }
460
+ example
461
+ end
462
+ end
463
+ end
@@ -47,6 +47,13 @@ module DSPy
47
47
  n.register_event('dspy.react.iteration_complete')
48
48
  n.register_event('dspy.react.max_iterations')
49
49
 
50
+ # CodeAct events
51
+ n.register_event('dspy.codeact')
52
+ n.register_event('dspy.codeact.iteration')
53
+ n.register_event('dspy.codeact.code_execution')
54
+ n.register_event('dspy.codeact.iteration_complete')
55
+ n.register_event('dspy.codeact.max_iterations')
56
+
50
57
  # Evaluation events
51
58
  n.register_event('dspy.evaluation.start')
52
59
  n.register_event('dspy.evaluation.example')
@@ -80,6 +87,14 @@ module DSPy
80
87
  n.register_event('dspy.storage.import')
81
88
  n.register_event('dspy.storage.cleanup')
82
89
 
90
+ # Memory compaction events
91
+ n.register_event('dspy.memory.compaction_check')
92
+ n.register_event('dspy.memory.size_compaction')
93
+ n.register_event('dspy.memory.age_compaction')
94
+ n.register_event('dspy.memory.deduplication')
95
+ n.register_event('dspy.memory.relevance_pruning')
96
+ n.register_event('dspy.memory.compaction_complete')
97
+
83
98
  # Registry events
84
99
  n.register_event('dspy.registry.register_start')
85
100
  n.register_event('dspy.registry.register_complete')
@@ -14,6 +14,9 @@ module DSPy
14
14
  # Anthropic requires system message to be separate from messages
15
15
  system_message, user_messages = extract_system_message(normalize_messages(messages))
16
16
 
17
+ # Apply JSON prefilling if needed for better Claude JSON compliance
18
+ user_messages = prepare_messages_for_json(user_messages, system_message)
19
+
17
20
  request_params = {
18
21
  model: model,
19
22
  messages: user_messages,
@@ -77,6 +80,109 @@ module DSPy
77
80
 
78
81
  private
79
82
 
83
+ # Enhanced JSON extraction specifically for Claude models
84
+ # Handles multiple patterns of markdown-wrapped JSON responses
85
+ def extract_json_from_response(content)
86
+ return content if content.nil? || content.empty?
87
+
88
+ # Pattern 1: ```json blocks
89
+ if content.include?('```json')
90
+ extracted = content[/```json\s*\n(.*?)\n```/m, 1]
91
+ return extracted.strip if extracted
92
+ end
93
+
94
+ # Pattern 2: ## Output values header
95
+ if content.include?('## Output values')
96
+ extracted = content.split('## Output values').last
97
+ .gsub(/```json\s*\n/, '')
98
+ .gsub(/\n```.*/, '')
99
+ .strip
100
+ return extracted if extracted && !extracted.empty?
101
+ end
102
+
103
+ # Pattern 3: Generic code blocks (check if it looks like JSON)
104
+ if content.include?('```')
105
+ extracted = content[/```\s*\n(.*?)\n```/m, 1]
106
+ return extracted.strip if extracted && looks_like_json?(extracted)
107
+ end
108
+
109
+ # Pattern 4: Already valid JSON or fallback
110
+ content.strip
111
+ end
112
+
113
+ # Simple heuristic to check if content looks like JSON
114
+ def looks_like_json?(str)
115
+ return false if str.nil? || str.empty?
116
+ trimmed = str.strip
117
+ (trimmed.start_with?('{') && trimmed.end_with?('}')) ||
118
+ (trimmed.start_with?('[') && trimmed.end_with?(']'))
119
+ end
120
+
121
+ # Prepare messages for JSON output by adding prefilling and strong instructions
122
+ def prepare_messages_for_json(user_messages, system_message)
123
+ return user_messages unless requires_json_output?(user_messages, system_message)
124
+ return user_messages unless tends_to_wrap_json?
125
+
126
+ # Add strong JSON instruction to the last user message if not already present
127
+ enhanced_messages = enhance_json_instructions(user_messages)
128
+
129
+ # Only add prefill for models that support it and temporarily disable for testing
130
+ if false # supports_prefilling? - temporarily disabled
131
+ add_json_prefill(enhanced_messages)
132
+ else
133
+ enhanced_messages
134
+ end
135
+ end
136
+
137
+ # Detect if the conversation requires JSON output
138
+ def requires_json_output?(user_messages, system_message)
139
+ # Check for JSON-related keywords in messages
140
+ all_content = [system_message] + user_messages.map { |m| m[:content] }
141
+ all_content.compact.any? do |content|
142
+ content.downcase.include?('json') ||
143
+ content.include?('```') ||
144
+ content.include?('{') ||
145
+ content.include?('output')
146
+ end
147
+ end
148
+
149
+ # Check if this is a Claude model that benefits from prefilling
150
+ def supports_prefilling?
151
+ # Claude models that work well with JSON prefilling
152
+ model.downcase.include?('claude')
153
+ end
154
+
155
+ # Check if this is a Claude model that tends to wrap JSON in markdown
156
+ def tends_to_wrap_json?
157
+ # All Claude models have this tendency, especially Opus variants
158
+ model.downcase.include?('claude')
159
+ end
160
+
161
+ # Enhance the last user message with strong JSON instructions
162
+ def enhance_json_instructions(user_messages)
163
+ return user_messages if user_messages.empty?
164
+
165
+ enhanced_messages = user_messages.dup
166
+ last_message = enhanced_messages.last
167
+
168
+ # Only add instruction if not already present
169
+ unless last_message[:content].include?('ONLY valid JSON')
170
+ # Use smart default instruction for Claude models
171
+ json_instruction = "\n\nIMPORTANT: Respond with ONLY valid JSON. No markdown formatting, no code blocks, no explanations. Start your response with '{' and end with '}'."
172
+
173
+ last_message = last_message.dup
174
+ last_message[:content] = last_message[:content] + json_instruction
175
+ enhanced_messages[-1] = last_message
176
+ end
177
+
178
+ enhanced_messages
179
+ end
180
+
181
+ # Add assistant message prefill to guide Claude
182
+ def add_json_prefill(user_messages)
183
+ user_messages + [{ role: "assistant", content: "{" }]
184
+ end
185
+
80
186
  def extract_system_message(messages)
81
187
  system_message = nil
82
188
  user_messages = []
data/lib/dspy/lm.rb CHANGED
@@ -139,11 +139,16 @@ module DSPy
139
139
  # Try to parse the response as JSON
140
140
  content = response.content
141
141
 
142
- # Extract JSON if it's in a code block
143
- if content.include?('```json')
144
- content = content.split('```json').last.split('```').first.strip
145
- elsif content.include?('```')
146
- content = content.split('```').last.split('```').first.strip
142
+ # Let adapters handle their own extraction logic if available
143
+ if adapter && adapter.respond_to?(:extract_json_from_response, true)
144
+ content = adapter.send(:extract_json_from_response, content)
145
+ else
146
+ # Fallback: Extract JSON if it's in a code block (legacy behavior)
147
+ if content.include?('```json')
148
+ content = content.split('```json').last.split('```').first.strip
149
+ elsif content.include?('```')
150
+ content = content.split('```').last.split('```').first.strip
151
+ end
147
152
  end
148
153
 
149
154
  begin
@@ -152,8 +157,17 @@ module DSPy
152
157
  # For Sorbet signatures, just return the parsed JSON
153
158
  # The Predict will handle validation
154
159
  json_payload
155
- rescue JSON::ParserError
156
- raise "Failed to parse LLM response as JSON: #{content}"
160
+ rescue JSON::ParserError => e
161
+ # Enhanced error message with debugging information
162
+ error_details = {
163
+ original_content: response.content,
164
+ extracted_content: content,
165
+ provider: provider,
166
+ model: model
167
+ }
168
+
169
+ DSPy.logger.debug("JSON parsing failed: #{error_details}")
170
+ raise "Failed to parse LLM response as JSON: #{e.message}. Original content length: #{response.content&.length || 0} chars"
157
171
  end
158
172
  end
159
173
  end