dspy 0.27.6 → 0.28.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,132 +0,0 @@
1
- # frozen_string_literal: true
2
-
3
- require "sorbet-runtime"
4
- require "async"
5
-
6
- module DSPy
7
- class LM
8
- # Handles retry logic with progressive fallback strategies
9
- class RetryHandler
10
- extend T::Sig
11
-
12
- MAX_RETRIES = 3
13
- BACKOFF_BASE = 0.5 # seconds
14
-
15
- sig { params(adapter: DSPy::LM::Adapter, signature_class: T.class_of(DSPy::Signature)).void }
16
- def initialize(adapter, signature_class)
17
- @adapter = adapter
18
- @signature_class = signature_class
19
- @attempt = 0
20
- end
21
-
22
- # Execute a block with retry logic and progressive fallback
23
- sig do
24
- type_parameters(:T)
25
- .params(
26
- initial_strategy: Strategies::BaseStrategy,
27
- block: T.proc.params(strategy: Strategies::BaseStrategy).returns(T.type_parameter(:T))
28
- )
29
- .returns(T.type_parameter(:T))
30
- end
31
- def with_retry(initial_strategy, &block)
32
- # Skip retries entirely if disabled
33
- unless DSPy.config.structured_outputs.retry_enabled
34
- return yield(initial_strategy)
35
- end
36
-
37
- strategies = build_fallback_chain(initial_strategy)
38
- last_error = nil
39
-
40
- strategies.each do |strategy|
41
- retry_count = 0
42
-
43
- begin
44
- @attempt += 1
45
- DSPy.logger.debug("Attempting with strategy: #{strategy.name} (attempt #{@attempt})")
46
-
47
- result = yield(strategy)
48
-
49
- # Success! Reset attempt counter for next time
50
- @attempt = 0
51
- return result
52
-
53
- rescue JSON::ParserError, StandardError => e
54
- last_error = e
55
-
56
- # Let strategy handle the error first
57
- if strategy.handle_error(e)
58
- DSPy.logger.debug("Strategy #{strategy.name} handled error, trying next strategy")
59
- next # Try next strategy
60
- end
61
-
62
- # Try retrying with the same strategy
63
- if retry_count < max_retries_for_strategy(strategy)
64
- retry_count += 1
65
- backoff_time = calculate_backoff(retry_count)
66
-
67
- # Use debug for structured output strategies since they often have expected failures
68
- log_level = ["openai_structured_output", "gemini_structured_output"].include?(strategy.name) ? :debug : :warn
69
-
70
- if log_level == :debug
71
- DSPy.logger.debug(
72
- "Retrying #{strategy.name} after error (attempt #{retry_count}/#{max_retries_for_strategy(strategy)}): #{e.message}"
73
- )
74
- else
75
- DSPy.logger.warn(
76
- "Retrying #{strategy.name} after error (attempt #{retry_count}/#{max_retries_for_strategy(strategy)}): #{e.message}"
77
- )
78
- end
79
-
80
- Async::Task.current.sleep(backoff_time) if backoff_time > 0
81
- retry
82
- else
83
- DSPy.logger.info("Max retries reached for #{strategy.name}, trying next strategy")
84
- next # Try next strategy
85
- end
86
- end
87
- end
88
-
89
- # All strategies exhausted
90
- DSPy.logger.error("All strategies exhausted after #{@attempt} total attempts")
91
- raise last_error || StandardError.new("All JSON extraction strategies failed")
92
- end
93
-
94
- private
95
-
96
- # Build a chain of strategies to try in order
97
- sig { params(initial_strategy: Strategies::BaseStrategy).returns(T::Array[Strategies::BaseStrategy]) }
98
- def build_fallback_chain(initial_strategy)
99
- selector = StrategySelector.new(@adapter, @signature_class)
100
- all_strategies = selector.available_strategies.sort_by(&:priority).reverse
101
-
102
- # Start with the requested strategy, then try others
103
- chain = [initial_strategy]
104
- chain.concat(all_strategies.reject { |s| s.name == initial_strategy.name })
105
-
106
- chain
107
- end
108
-
109
- # Different strategies get different retry counts
110
- sig { params(strategy: Strategies::BaseStrategy).returns(Integer) }
111
- def max_retries_for_strategy(strategy)
112
- case strategy.name
113
- when "openai_structured_output", "gemini_structured_output"
114
- 1 # Structured outputs rarely benefit from retries, most errors are permanent
115
- when "anthropic_extraction"
116
- 2 # Anthropic can be a bit more variable
117
- else
118
- MAX_RETRIES # Enhanced prompting might need more attempts
119
- end
120
- end
121
-
122
- # Calculate exponential backoff with jitter
123
- sig { params(attempt: Integer).returns(Float) }
124
- def calculate_backoff(attempt)
125
- base_delay = BACKOFF_BASE * (2 ** (attempt - 1))
126
- jitter = rand * 0.1 * base_delay
127
-
128
- [base_delay + jitter, 10.0].min # Cap at 10 seconds
129
- end
130
- end
131
- end
132
- end
@@ -1,78 +0,0 @@
1
- # frozen_string_literal: true
2
-
3
- require_relative "base_strategy"
4
-
5
- module DSPy
6
- class LM
7
- module Strategies
8
- # Strategy for using Anthropic's enhanced JSON extraction patterns
9
- class AnthropicExtractionStrategy < BaseStrategy
10
- extend T::Sig
11
-
12
- sig { override.returns(T::Boolean) }
13
- def available?
14
- adapter.is_a?(DSPy::LM::AnthropicAdapter)
15
- end
16
-
17
- sig { override.returns(Integer) }
18
- def priority
19
- 90 # High priority - Anthropic's extraction is very reliable
20
- end
21
-
22
- sig { override.returns(String) }
23
- def name
24
- "anthropic_extraction"
25
- end
26
-
27
- sig { override.params(messages: T::Array[T::Hash[Symbol, String]], request_params: T::Hash[Symbol, T.untyped]).void }
28
- def prepare_request(messages, request_params)
29
- # Anthropic adapter already handles JSON optimization in prepare_messages_for_json
30
- # No additional preparation needed here
31
- end
32
-
33
- sig { override.params(response: DSPy::LM::Response).returns(T.nilable(String)) }
34
- def extract_json(response)
35
- # Use Anthropic's specialized extraction method if available
36
- if adapter.respond_to?(:extract_json_from_response)
37
- adapter.extract_json_from_response(response.content)
38
- else
39
- # Fallback to basic extraction
40
- extract_json_fallback(response.content)
41
- end
42
- end
43
-
44
- private
45
-
46
- sig { params(content: T.nilable(String)).returns(T.nilable(String)) }
47
- def extract_json_fallback(content)
48
- return nil if content.nil?
49
-
50
- # Try the 4 patterns Anthropic adapter uses
51
- # Pattern 1: ```json blocks
52
- if content.include?('```json')
53
- return content.split('```json').last.split('```').first.strip
54
- end
55
-
56
- # Pattern 2: ## Output values header
57
- if content.include?('## Output values')
58
- json_part = content.split('## Output values').last
59
- if json_part.include?('```')
60
- return json_part.split('```')[1].strip
61
- end
62
- end
63
-
64
- # Pattern 3: Generic code blocks
65
- if content.include?('```')
66
- code_block = content.split('```')[1]
67
- if code_block && (code_block.strip.start_with?('{') || code_block.strip.start_with?('['))
68
- return code_block.strip
69
- end
70
- end
71
-
72
- # Pattern 4: Already valid JSON
73
- content.strip if content.strip.start_with?('{') || content.strip.start_with?('[')
74
- end
75
- end
76
- end
77
- end
78
- end
@@ -1,132 +0,0 @@
1
- # frozen_string_literal: true
2
-
3
- require "sorbet-runtime"
4
-
5
- module DSPy
6
- class LM
7
- module Strategies
8
- # Strategy for using Anthropic's tool use feature for guaranteed JSON output
9
- class AnthropicToolUseStrategy < BaseStrategy
10
- extend T::Sig
11
-
12
- sig { override.returns(T::Boolean) }
13
- def available?
14
- # Only available for Anthropic adapters with models that support tool use
15
- adapter.is_a?(DSPy::LM::AnthropicAdapter) && supports_tool_use?
16
- end
17
-
18
- sig { override.returns(Integer) }
19
- def priority
20
- 95 # Higher priority than extraction strategy - tool use is more reliable
21
- end
22
-
23
- sig { override.returns(String) }
24
- def name
25
- "anthropic_tool_use"
26
- end
27
-
28
- sig { override.params(messages: T::Array[T::Hash[Symbol, String]], request_params: T::Hash[Symbol, T.untyped]).void }
29
- def prepare_request(messages, request_params)
30
- # Convert signature output schema to Anthropic tool format
31
- tool_schema = convert_to_tool_schema
32
-
33
- # Add the tool definition to request params
34
- request_params[:tools] = [tool_schema]
35
-
36
- # Force the model to use our tool
37
- request_params[:tool_choice] = {
38
- type: "tool",
39
- name: "json_output"
40
- }
41
-
42
- # Update the last user message to request tool use
43
- if messages.any? && messages.last[:role] == "user"
44
- messages.last[:content] += "\n\nPlease use the json_output tool to provide your response."
45
- end
46
- end
47
-
48
- sig { override.params(response: DSPy::LM::Response).returns(T.nilable(String)) }
49
- def extract_json(response)
50
- # Extract JSON from tool use response
51
- begin
52
- # Check for tool calls in metadata first (this is the primary method)
53
- if response.metadata.respond_to?(:tool_calls) && response.metadata.tool_calls
54
- tool_calls = response.metadata.tool_calls
55
- if tool_calls.is_a?(Array) && !tool_calls.empty?
56
- first_call = tool_calls.first
57
- if first_call[:name] == "json_output" && first_call[:input]
58
- json_result = JSON.generate(first_call[:input])
59
- return json_result
60
- end
61
- end
62
- end
63
-
64
- # Fallback: try to extract from content if it contains tool use blocks
65
- content = response.content
66
- if content && !content.empty? && content.include?("<tool_use>")
67
- tool_content = content[/<tool_use>.*?<\/tool_use>/m]
68
- if tool_content
69
- json_match = tool_content[/<input>(.*?)<\/input>/m, 1]
70
- return json_match.strip if json_match
71
- end
72
- end
73
-
74
- nil
75
- rescue => e
76
- DSPy.logger.debug("Failed to extract tool use JSON: #{e.message}")
77
- nil
78
- end
79
- end
80
-
81
- sig { override.params(error: StandardError).returns(T::Boolean) }
82
- def handle_error(error)
83
- # Tool use errors should trigger fallback to extraction strategy
84
- if error.message.include?("tool") || error.message.include?("invalid_request_error")
85
- DSPy.logger.warn("Anthropic tool use failed: #{error.message}")
86
- true # We handled it, try next strategy
87
- else
88
- false # Let retry handler deal with it
89
- end
90
- end
91
-
92
- private
93
-
94
- sig { returns(T::Boolean) }
95
- def supports_tool_use?
96
- # Check if model supports tool use
97
- # Claude 3 models (Opus, Sonnet, Haiku) support tool use
98
- model = adapter.model.downcase
99
- model.include?("claude-3") || model.include?("claude-3.5")
100
- end
101
-
102
- sig { returns(T::Hash[Symbol, T.untyped]) }
103
- def convert_to_tool_schema
104
- # Get output fields from signature
105
- output_fields = signature_class.output_field_descriptors
106
-
107
- # Convert to Anthropic tool format
108
- {
109
- name: "json_output",
110
- description: "Output the result in the required JSON format",
111
- input_schema: {
112
- type: "object",
113
- properties: build_properties_from_fields(output_fields),
114
- required: output_fields.keys.map(&:to_s)
115
- }
116
- }
117
- end
118
-
119
- sig { params(fields: T::Hash[Symbol, T.untyped]).returns(T::Hash[String, T.untyped]) }
120
- def build_properties_from_fields(fields)
121
- properties = {}
122
-
123
- fields.each do |field_name, descriptor|
124
- properties[field_name.to_s] = DSPy::TypeSystem::SorbetJsonSchema.type_to_json_schema(descriptor.type)
125
- end
126
-
127
- properties
128
- end
129
- end
130
- end
131
- end
132
- end
@@ -1,53 +0,0 @@
1
- # frozen_string_literal: true
2
-
3
- require "sorbet-runtime"
4
-
5
- module DSPy
6
- class LM
7
- module Strategies
8
- # Base class for JSON extraction strategies
9
- class BaseStrategy
10
- extend T::Sig
11
- extend T::Helpers
12
- abstract!
13
-
14
- sig { params(adapter: DSPy::LM::Adapter, signature_class: T.class_of(DSPy::Signature)).void }
15
- def initialize(adapter, signature_class)
16
- @adapter = adapter
17
- @signature_class = signature_class
18
- end
19
-
20
- # Check if this strategy is available for the given adapter/model
21
- sig { abstract.returns(T::Boolean) }
22
- def available?; end
23
-
24
- # Priority for this strategy (higher = preferred)
25
- sig { abstract.returns(Integer) }
26
- def priority; end
27
-
28
- # Name of the strategy for logging/debugging
29
- sig { abstract.returns(String) }
30
- def name; end
31
-
32
- # Prepare the request for JSON extraction
33
- sig { abstract.params(messages: T::Array[T::Hash[Symbol, String]], request_params: T::Hash[Symbol, T.untyped]).void }
34
- def prepare_request(messages, request_params); end
35
-
36
- # Extract JSON from the response
37
- sig { abstract.params(response: DSPy::LM::Response).returns(T.nilable(String)) }
38
- def extract_json(response); end
39
-
40
- # Handle errors specific to this strategy
41
- sig { params(error: StandardError).returns(T::Boolean) }
42
- def handle_error(error)
43
- # By default, don't handle errors - let them propagate
44
- false
45
- end
46
-
47
- protected
48
-
49
- attr_reader :adapter, :signature_class
50
- end
51
- end
52
- end
53
- end
@@ -1,178 +0,0 @@
1
- # frozen_string_literal: true
2
-
3
- require_relative "base_strategy"
4
-
5
- module DSPy
6
- class LM
7
- module Strategies
8
- # Enhanced prompting strategy that works with any LLM
9
- # Adds explicit JSON formatting instructions to improve reliability
10
- class EnhancedPromptingStrategy < BaseStrategy
11
- extend T::Sig
12
-
13
- sig { override.returns(T::Boolean) }
14
- def available?
15
- # This strategy is always available as a fallback
16
- true
17
- end
18
-
19
- sig { override.returns(Integer) }
20
- def priority
21
- 50 # Medium priority - use when native methods aren't available
22
- end
23
-
24
- sig { override.returns(String) }
25
- def name
26
- "enhanced_prompting"
27
- end
28
-
29
- sig { override.params(messages: T::Array[T::Hash[Symbol, String]], request_params: T::Hash[Symbol, T.untyped]).void }
30
- def prepare_request(messages, request_params)
31
- # Enhance the user message with explicit JSON instructions
32
- return if messages.empty?
33
-
34
- # Get the output schema
35
- output_schema = signature_class.output_json_schema
36
-
37
- # Find the last user message
38
- last_user_idx = messages.rindex { |msg| msg[:role] == "user" }
39
- return unless last_user_idx
40
-
41
- # Add JSON formatting instructions
42
- original_content = messages[last_user_idx][:content]
43
- enhanced_content = enhance_prompt_with_json_instructions(original_content, output_schema)
44
- messages[last_user_idx][:content] = enhanced_content
45
-
46
- # Add system instructions if no system message exists
47
- if messages.none? { |msg| msg[:role] == "system" }
48
- messages.unshift({
49
- role: "system",
50
- content: "You are a helpful assistant that always responds with valid JSON when requested."
51
- })
52
- end
53
- end
54
-
55
- sig { override.params(response: DSPy::LM::Response).returns(T.nilable(String)) }
56
- def extract_json(response)
57
- return nil if response.content.nil?
58
-
59
- content = response.content.strip
60
-
61
- # Try multiple extraction patterns
62
- # 1. Check for markdown code blocks
63
- if content.include?('```json')
64
- json_content = content.split('```json').last.split('```').first.strip
65
- return json_content if valid_json?(json_content)
66
- elsif content.include?('```')
67
- code_block = content.split('```')[1]
68
- if code_block
69
- json_content = code_block.strip
70
- return json_content if valid_json?(json_content)
71
- end
72
- end
73
-
74
- # 2. Check if the entire response is JSON
75
- return content if valid_json?(content)
76
-
77
- # 3. Look for JSON-like structures in the content
78
- json_match = content.match(/\{[\s\S]*\}|\[[\s\S]*\]/)
79
- if json_match
80
- json_content = json_match[0]
81
- return json_content if valid_json?(json_content)
82
- end
83
-
84
- nil
85
- end
86
-
87
- private
88
-
89
- sig { params(prompt: String, schema: T::Hash[Symbol, T.untyped]).returns(String) }
90
- def enhance_prompt_with_json_instructions(prompt, schema)
91
- json_example = generate_example_from_schema(schema)
92
-
93
- <<~ENHANCED
94
- #{prompt}
95
-
96
- IMPORTANT: You must respond with valid JSON that matches this structure:
97
- ```json
98
- #{JSON.pretty_generate(json_example)}
99
- ```
100
-
101
- Required fields: #{schema[:required]&.join(', ') || 'none'}
102
-
103
- Ensure your response:
104
- 1. Is valid JSON (properly quoted strings, no trailing commas)
105
- 2. Includes all required fields
106
- 3. Uses the correct data types for each field
107
- 4. Is wrapped in ```json``` markdown code blocks
108
- ENHANCED
109
- end
110
-
111
- sig { params(schema: T::Hash[Symbol, T.untyped]).returns(T::Hash[String, T.untyped]) }
112
- def generate_example_from_schema(schema)
113
- return {} unless schema[:properties]
114
-
115
- example = {}
116
- schema[:properties].each do |field_name, field_schema|
117
- example[field_name.to_s] = generate_example_value(field_schema)
118
- end
119
- example
120
- end
121
-
122
- sig { params(field_schema: T::Hash[Symbol, T.untyped]).returns(T.untyped) }
123
- def generate_example_value(field_schema)
124
- case field_schema[:type]
125
- when "string"
126
- field_schema[:description] || "example string"
127
- when "integer"
128
- 42
129
- when "number"
130
- 3.14
131
- when "boolean"
132
- true
133
- when "array"
134
- if field_schema[:items]
135
- [generate_example_value(field_schema[:items])]
136
- else
137
- ["example item"]
138
- end
139
- when "object"
140
- if field_schema[:properties]
141
- # Generate proper nested object example
142
- nested_example = {}
143
- field_schema[:properties].each do |prop_name, prop_schema|
144
- nested_example[prop_name.to_s] = generate_example_value(prop_schema)
145
- end
146
- nested_example
147
- else
148
- { "nested" => "object" }
149
- end
150
- when Array
151
- # Handle union types like ["object", "null"]
152
- if field_schema[:type].include?("object") && field_schema[:properties]
153
- nested_example = {}
154
- field_schema[:properties].each do |prop_name, prop_schema|
155
- nested_example[prop_name.to_s] = generate_example_value(prop_schema)
156
- end
157
- nested_example
158
- elsif field_schema[:type].include?("string")
159
- "example string"
160
- else
161
- "example value"
162
- end
163
- else
164
- "example value"
165
- end
166
- end
167
-
168
- sig { params(content: String).returns(T::Boolean) }
169
- def valid_json?(content)
170
- JSON.parse(content)
171
- true
172
- rescue JSON::ParserError
173
- false
174
- end
175
- end
176
- end
177
- end
178
- end
@@ -1,80 +0,0 @@
1
- # frozen_string_literal: true
2
-
3
- require_relative "base_strategy"
4
- require_relative "../adapters/gemini/schema_converter"
5
-
6
- module DSPy
7
- class LM
8
- module Strategies
9
- # Strategy for using Gemini's native structured output feature
10
- class GeminiStructuredOutputStrategy < BaseStrategy
11
- extend T::Sig
12
-
13
- sig { override.returns(T::Boolean) }
14
- def available?
15
- # Check if adapter is Gemini and supports structured outputs
16
- return false unless adapter.is_a?(DSPy::LM::GeminiAdapter)
17
- return false unless adapter.instance_variable_get(:@structured_outputs_enabled)
18
-
19
- DSPy::LM::Adapters::Gemini::SchemaConverter.supports_structured_outputs?(adapter.model)
20
- end
21
-
22
- sig { override.returns(Integer) }
23
- def priority
24
- 100 # Highest priority - native structured outputs are most reliable
25
- end
26
-
27
- sig { override.returns(String) }
28
- def name
29
- "gemini_structured_output"
30
- end
31
-
32
- sig { override.params(messages: T::Array[T::Hash[Symbol, String]], request_params: T::Hash[Symbol, T.untyped]).void }
33
- def prepare_request(messages, request_params)
34
- # Convert signature to Gemini JSON Schema format (supports oneOf/anyOf for unions)
35
- schema = DSPy::LM::Adapters::Gemini::SchemaConverter.to_gemini_format(signature_class)
36
-
37
- # Add generation_config for structured output using JSON Schema format
38
- request_params[:generation_config] = {
39
- response_mime_type: "application/json",
40
- response_json_schema: schema # Use JSON Schema format for proper union support
41
- }
42
- end
43
-
44
- sig { override.params(response: DSPy::LM::Response).returns(T.nilable(String)) }
45
- def extract_json(response)
46
- # With Gemini structured outputs, the response should already be valid JSON
47
- # Just return the content as-is
48
- response.content
49
- end
50
-
51
- sig { override.params(error: StandardError).returns(T::Boolean) }
52
- def handle_error(error)
53
- # Handle Gemini-specific structured output errors
54
- error_msg = error.message.to_s.downcase
55
-
56
- # Check for permanent errors that shouldn't be retried
57
- permanent_error_patterns = [
58
- "schema",
59
- "generation_config",
60
- "response_schema",
61
- "unknown name \"response_mime_type\"",
62
- "unknown name \"response_schema\"",
63
- "invalid json payload",
64
- "no matching sse interaction found", # VCR test configuration issue
65
- "cannot find field"
66
- ]
67
-
68
- if permanent_error_patterns.any? { |pattern| error_msg.include?(pattern) }
69
- # These are permanent errors - no point retrying
70
- DSPy.logger.debug("Gemini structured output failed (permanent error, skipping retries): #{error.message}")
71
- true # Skip retries and try next strategy
72
- else
73
- # Unknown error - let retry logic handle it
74
- false
75
- end
76
- end
77
- end
78
- end
79
- end
80
- end