dspy 0.27.1 → 0.27.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 7962564b749798c462b3202e16153aa9e508e8d7fb10d6618c76e740ecb171d1
4
- data.tar.gz: d81fad4637c82967745d644c291f7436e26c0c1bf06785ff56028c71d4d5f26d
3
+ metadata.gz: 07ebaf2db9b6279275ac4dd61ac4f994ed5415561b0063310f006db084306092
4
+ data.tar.gz: 8e8c1cbfcb36d9ffa4bfd68f62317fba7d5d32a6b1409ce43749f555a719fc97
5
5
  SHA512:
6
- metadata.gz: 38ed78b62481f7267a9d1fa7157af436bdfd82d6a8281b00b9d8915fa74e7b0b5d896f8dfa9d6f4a91aa829973f7db94445d76b3cdb5e7c66800cd23f28f1fe9
7
- data.tar.gz: 61f0ef47790c3c9aa6c6ecd1f7ee11268ac81b27ca75212af2c69b4f4b077fa07c9acb237c7d7b3f4174c85d7de415684ac9fef5c88e01d538671b93910fa889
6
+ metadata.gz: 8f19c7e01b5e03743457c9784b8cca05ef42e8002ff210dd59f08915f11ceb7c1c000f8b0eac6cd102c847f069194923a46c5197473ea53cda7616921ea1da26
7
+ data.tar.gz: 9ed8e5ec4e08eb83f17dbea912a7a14b28212968f36dc7adf6cad7dd7e62de8506e371e99bb4f8f1d785847b45783fdfada53dae3d1d039a4f8695aba54984b4
@@ -83,46 +83,38 @@ module DSPy
83
83
  sig { returns(T.class_of(DSPy::Signature)) }
84
84
  attr_reader :original_signature
85
85
 
86
- # Override forward_untyped to add ChainOfThought-specific analysis and tracing
86
+ # Override forward_untyped to add ChainOfThought-specific analysis
87
+ # Let Module#forward handle the ChainOfThought span creation automatically
87
88
  sig { override.params(input_values: T.untyped).returns(T.untyped) }
88
89
  def forward_untyped(**input_values)
89
- # Wrap in chain-specific span tracking (overrides parent's span attributes)
90
- DSPy::Context.with_span(
91
- operation: "#{self.class.name}.forward",
92
- 'langfuse.observation.type' => 'span', # Use 'span' for proper timing
93
- 'langfuse.observation.input' => input_values.to_json,
94
- 'dspy.module' => 'ChainOfThought',
95
- 'dspy.module_type' => 'chain_of_thought', # Semantic identifier
96
- 'dspy.signature' => @original_signature.name
97
- ) do |span|
98
- # Call parent prediction logic (which will create its own nested span)
99
- prediction_result = super(**input_values)
100
-
101
- # Enhance span with reasoning data
102
- if span && prediction_result
103
- # Include reasoning in output for chain observation
104
- output_with_reasoning = if prediction_result.respond_to?(:reasoning) && prediction_result.reasoning
105
- output_hash = prediction_result.respond_to?(:to_h) ? prediction_result.to_h : {}
106
- output_hash.merge(reasoning: prediction_result.reasoning)
107
- else
108
- prediction_result.respond_to?(:to_h) ? prediction_result.to_h : prediction_result.to_s
109
- end
110
-
111
- span.set_attribute('langfuse.observation.output', DSPy::Utils::Serialization.to_json(output_with_reasoning))
112
-
113
- # Add reasoning metrics
114
- if prediction_result.respond_to?(:reasoning) && prediction_result.reasoning
115
- span.set_attribute('cot.reasoning_length', prediction_result.reasoning.length)
116
- span.set_attribute('cot.has_reasoning', true)
117
- span.set_attribute('cot.reasoning_steps', count_reasoning_steps(prediction_result.reasoning))
118
- end
90
+ # Create a Predict instance and call its forward method (which will create Predict span via Module#forward)
91
+ # We can't call super.forward because that would go to Module#forward_untyped, not Module#forward
92
+
93
+ # Create a temporary Predict instance with our enhanced signature to get the prediction
94
+ predict_instance = DSPy::Predict.new(@signature_class)
95
+ predict_instance.config.lm = self.lm # Use the same LM configuration
96
+
97
+ # Call predict's forward method, which will create the Predict span
98
+ prediction_result = predict_instance.forward(**input_values)
99
+
100
+ # Add ChainOfThought-specific analysis and events
101
+ if DSPy::Observability.enabled? && prediction_result
102
+ # Add reasoning metrics via events
103
+ if prediction_result.respond_to?(:reasoning) && prediction_result.reasoning
104
+ DSPy.event('chain_of_thought.reasoning_metrics', {
105
+ 'cot.reasoning_length' => prediction_result.reasoning.length,
106
+ 'cot.has_reasoning' => true,
107
+ 'cot.reasoning_steps' => count_reasoning_steps(prediction_result.reasoning),
108
+ 'dspy.module_type' => 'chain_of_thought',
109
+ 'dspy.signature' => @original_signature.name
110
+ })
119
111
  end
120
-
121
- # Analyze reasoning (emits events for backwards compatibility)
122
- analyze_reasoning(prediction_result)
123
-
124
- prediction_result
125
112
  end
113
+
114
+ # Analyze reasoning (emits events for backwards compatibility)
115
+ analyze_reasoning(prediction_result)
116
+
117
+ prediction_result
126
118
  end
127
119
 
128
120
  private
@@ -204,7 +196,7 @@ module DSPy
204
196
  # Emits reasoning analysis instrumentation event
205
197
  sig { params(reasoning_content: String).void }
206
198
  def emit_reasoning_analysis(reasoning_content)
207
- DSPy.log('chain_of_thought.reasoning_complete', **{
199
+ DSPy.event('chain_of_thought.reasoning_complete', {
208
200
  'dspy.signature' => @original_signature.name,
209
201
  'cot.reasoning_steps' => count_reasoning_steps(reasoning_content),
210
202
  'cot.reasoning_length' => reasoning_content.length,
data/lib/dspy/code_act.rb CHANGED
@@ -355,7 +355,7 @@ module DSPy
355
355
 
356
356
  sig { params(iteration: Integer, thought: String, ruby_code: String, execution_result: T.nilable(String), error_message: T.nilable(String)).void }
357
357
  def emit_iteration_complete_event(iteration, thought, ruby_code, execution_result, error_message)
358
- DSPy.log('codeact.iteration_complete', **{
358
+ DSPy.event('codeact.iteration_complete', {
359
359
  'codeact.iteration' => iteration,
360
360
  'codeact.thought' => thought,
361
361
  'codeact.ruby_code' => ruby_code,
@@ -368,7 +368,7 @@ module DSPy
368
368
  sig { params(iterations_count: Integer, final_answer: T.nilable(String), history: T::Array[CodeActHistoryEntry]).void }
369
369
  def handle_max_iterations_if_needed(iterations_count, final_answer, history)
370
370
  if iterations_count >= @max_iterations && final_answer.nil?
371
- DSPy.log('codeact.max_iterations', **{
371
+ DSPy.event('codeact.max_iterations', {
372
372
  'codeact.iteration_count' => iterations_count,
373
373
  'codeact.max_iterations' => @max_iterations,
374
374
  'codeact.final_history_length' => history.length
data/lib/dspy/context.rb CHANGED
@@ -6,31 +6,41 @@ module DSPy
6
6
  class Context
7
7
  class << self
8
8
  def current
9
- # Use a combination of Thread and Fiber storage for proper context management
10
- # Thread storage ensures thread isolation
11
- # Fiber storage ensures OpenTelemetry context propagation
9
+ # Use Thread storage as primary source to ensure thread isolation
10
+ # Fiber storage is used for OpenTelemetry context propagation within the same thread
12
11
 
13
12
  # Create a unique key for this thread to ensure isolation
14
13
  thread_key = :"dspy_context_#{Thread.current.object_id}"
15
14
 
16
- # Check if this thread has its own context
15
+ # Always check thread-local storage first for proper isolation
17
16
  if Thread.current[thread_key]
18
- # Thread has context, ensure fiber has it too for OpenTelemetry
19
- Fiber[:dspy_context] ||= Thread.current[thread_key]
20
- else
21
- # No context for this thread - create new one
22
- context = {
23
- trace_id: SecureRandom.uuid,
24
- span_stack: []
25
- }
26
- # Set in both Thread and Fiber storage
27
- Thread.current[thread_key] = context
28
- Thread.current[:dspy_context] = context # Keep for backward compatibility
29
- Fiber[:dspy_context] = context
17
+ # Thread has context, ensure fiber inherits it for OpenTelemetry propagation
18
+ Fiber[:dspy_context] = Thread.current[thread_key]
19
+ Thread.current[:dspy_context] = Thread.current[thread_key] # Keep for backward compatibility
20
+ return Thread.current[thread_key]
30
21
  end
31
22
 
32
- # Return the context (from Fiber storage for OpenTelemetry compatibility)
33
- Fiber[:dspy_context]
23
+ # Check if current fiber has context that was set by this same thread
24
+ # This handles cases where context was set via OpenTelemetry propagation within the thread
25
+ if Fiber[:dspy_context] && Thread.current[:dspy_context] == Fiber[:dspy_context]
26
+ # This fiber context was set by this thread, safe to use
27
+ Thread.current[thread_key] = Fiber[:dspy_context]
28
+ return Fiber[:dspy_context]
29
+ end
30
+
31
+ # No existing context or context belongs to different thread - create new one
32
+ context = {
33
+ trace_id: SecureRandom.uuid,
34
+ span_stack: [],
35
+ otel_span_stack: []
36
+ }
37
+
38
+ # Set in both Thread and Fiber storage
39
+ Thread.current[thread_key] = context
40
+ Thread.current[:dspy_context] = context # Keep for backward compatibility
41
+ Fiber[:dspy_context] = context
42
+
43
+ context
34
44
  end
35
45
 
36
46
  def with_span(operation:, **attributes)
@@ -67,23 +77,66 @@ module DSPy
67
77
  # Record start time for explicit duration tracking
68
78
  otel_start_time = Time.now
69
79
 
70
- # Always use in_span which properly manages context internally
71
- DSPy::Observability.tracer.in_span(
72
- operation,
73
- attributes: span_attributes,
74
- kind: :internal
75
- ) do |span|
76
- result = yield(span)
77
-
78
- # Add explicit timing information to help Langfuse
79
- if span
80
- duration_ms = ((Time.now - otel_start_time) * 1000).round(3)
81
- span.set_attribute('duration.ms', duration_ms)
82
- span.set_attribute('langfuse.observation.startTime', otel_start_time.iso8601(3))
83
- span.set_attribute('langfuse.observation.endTime', Time.now.iso8601(3))
80
+ # Get parent OpenTelemetry span for proper context propagation
81
+ parent_otel_span = current[:otel_span_stack].last
82
+
83
+ # Create span with proper parent context
84
+ if parent_otel_span
85
+ # Use the parent span's context to ensure proper nesting
86
+ OpenTelemetry::Trace.with_span(parent_otel_span) do
87
+ DSPy::Observability.tracer.in_span(
88
+ operation,
89
+ attributes: span_attributes,
90
+ kind: :internal
91
+ ) do |span|
92
+ # Add to our OpenTelemetry span stack
93
+ current[:otel_span_stack].push(span)
94
+
95
+ begin
96
+ result = yield(span)
97
+
98
+ # Add explicit timing information to help Langfuse
99
+ if span
100
+ duration_ms = ((Time.now - otel_start_time) * 1000).round(3)
101
+ span.set_attribute('duration.ms', duration_ms)
102
+ span.set_attribute('langfuse.observation.startTime', otel_start_time.iso8601(3))
103
+ span.set_attribute('langfuse.observation.endTime', Time.now.iso8601(3))
104
+ end
105
+
106
+ result
107
+ ensure
108
+ # Remove from our OpenTelemetry span stack
109
+ current[:otel_span_stack].pop
110
+ end
111
+ end
112
+ end
113
+ else
114
+ # Root span - no parent context needed
115
+ DSPy::Observability.tracer.in_span(
116
+ operation,
117
+ attributes: span_attributes,
118
+ kind: :internal
119
+ ) do |span|
120
+ # Add to our OpenTelemetry span stack
121
+ current[:otel_span_stack].push(span)
122
+
123
+ begin
124
+ result = yield(span)
125
+
126
+ # Add explicit timing information to help Langfuse
127
+ if span
128
+ duration_ms = ((Time.now - otel_start_time) * 1000).round(3)
129
+ span.set_attribute('duration.ms', duration_ms)
130
+ span.set_attribute('langfuse.observation.startTime', otel_start_time.iso8601(3))
131
+ span.set_attribute('langfuse.observation.endTime', Time.now.iso8601(3))
132
+ end
133
+
134
+ result
135
+ ensure
136
+ # Remove from our OpenTelemetry span stack
137
+ current[:otel_span_stack].pop
138
+ end
84
139
  end
85
-
86
- result
87
140
  end
88
141
  else
89
142
  yield(nil)
data/lib/dspy/errors.rb CHANGED
@@ -7,6 +7,8 @@ module DSPy
7
7
 
8
8
  class DeserializationError < Error; end
9
9
 
10
+ class UnsupportedSchemaError < Error; end
11
+
10
12
  class ConfigurationError < Error
11
13
  def self.missing_lm(module_name)
12
14
  new(<<~MESSAGE)
@@ -1,7 +1,6 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  require "sorbet-runtime"
4
- require_relative "../../cache_manager"
5
4
 
6
5
  module DSPy
7
6
  class LM
@@ -11,57 +10,43 @@ module DSPy
11
10
  class SchemaConverter
12
11
  extend T::Sig
13
12
 
14
- # Models that support structured outputs
13
+ # Models that support structured outputs (JSON + Schema)
14
+ # Based on official Google documentation and gemini-ai gem table
15
15
  STRUCTURED_OUTPUT_MODELS = T.let([
16
- "gemini-1.5-pro",
17
- "gemini-1.5-flash",
18
- "gemini-2.0-flash-exp"
16
+ "gemini-1.5-pro", # ✅ Full schema support (legacy)
17
+ "gemini-1.5-pro-preview-0514", # ✅ Full schema support (legacy)
18
+ "gemini-1.5-pro-preview-0409", # ✅ Full schema support (legacy)
19
+ "gemini-2.5-flash", # ✅ Full schema support (2025 current)
20
+ "gemini-2.5-flash-lite" # ✅ Full schema support (2025 current)
21
+ ].freeze, T::Array[String])
22
+
23
+ # Models that support JSON mode but NOT schema
24
+ JSON_ONLY_MODELS = T.let([
25
+ "gemini-pro", # 🟡 JSON only, no schema
26
+ "gemini-1.5-flash", # 🟡 JSON only, no schema (legacy)
27
+ "gemini-1.5-flash-preview-0514", # 🟡 JSON only, no schema (legacy)
28
+ "gemini-1.0-pro-002", # 🟡 JSON only, no schema
29
+ "gemini-1.0-pro", # 🟡 JSON only, no schema
30
+ "gemini-2.0-flash-001", # 🟡 JSON only, no schema (2025)
31
+ "gemini-2.0-flash-lite-001" # 🟡 JSON only, no schema (2025)
19
32
  ].freeze, T::Array[String])
20
33
 
21
34
  sig { params(signature_class: T.class_of(DSPy::Signature)).returns(T::Hash[Symbol, T.untyped]) }
22
35
  def self.to_gemini_format(signature_class)
23
- # Check cache first
24
- cache_manager = DSPy::LM.cache_manager
25
- cached_schema = cache_manager.get_schema(signature_class, "gemini", {})
26
-
27
- if cached_schema
28
- DSPy.logger.debug("Using cached schema for #{signature_class.name}")
29
- return cached_schema
30
- end
31
-
32
36
  # Get the output JSON schema from the signature class
33
37
  output_schema = signature_class.output_json_schema
34
38
 
35
39
  # Convert to Gemini format (OpenAPI 3.0 Schema subset - not related to OpenAI)
36
- gemini_schema = convert_dspy_schema_to_gemini(output_schema)
37
-
38
- # Cache the result
39
- cache_manager.cache_schema(signature_class, "gemini", gemini_schema, {})
40
-
41
- gemini_schema
40
+ convert_dspy_schema_to_gemini(output_schema)
42
41
  end
43
42
 
44
43
  sig { params(model: String).returns(T::Boolean) }
45
44
  def self.supports_structured_outputs?(model)
46
- # Check cache first
47
- cache_manager = DSPy::LM.cache_manager
48
- cached_result = cache_manager.get_capability(model, "structured_outputs")
49
-
50
- if !cached_result.nil?
51
- DSPy.logger.debug("Using cached capability check for #{model}")
52
- return cached_result
53
- end
54
-
55
45
  # Extract base model name without provider prefix
56
46
  base_model = model.sub(/^gemini\//, "")
57
47
 
58
48
  # Check if it's a supported model or a newer version
59
- result = STRUCTURED_OUTPUT_MODELS.any? { |supported| base_model.start_with?(supported) }
60
-
61
- # Cache the result
62
- cache_manager.cache_capability(model, "structured_outputs", result)
63
-
64
- result
49
+ STRUCTURED_OUTPUT_MODELS.any? { |supported| base_model.start_with?(supported) }
65
50
  end
66
51
 
67
52
  sig { params(schema: T::Hash[Symbol, T.untyped]).returns(T::Array[String]) }
@@ -81,6 +66,8 @@ module DSPy
81
66
 
82
67
  sig { params(dspy_schema: T::Hash[Symbol, T.untyped]).returns(T::Hash[Symbol, T.untyped]) }
83
68
  def self.convert_dspy_schema_to_gemini(dspy_schema)
69
+ # For Gemini's responseJsonSchema, we need pure JSON Schema format
70
+ # Remove OpenAPI-specific fields like "$schema"
84
71
  result = {
85
72
  type: "object",
86
73
  properties: {},
@@ -101,6 +88,21 @@ module DSPy
101
88
 
102
89
  sig { params(property_schema: T::Hash[Symbol, T.untyped]).returns(T::Hash[Symbol, T.untyped]) }
103
90
  def self.convert_property_to_gemini(property_schema)
91
+ # Handle oneOf/anyOf schemas (union types) - Gemini supports these in responseJsonSchema
92
+ if property_schema[:oneOf]
93
+ return {
94
+ oneOf: property_schema[:oneOf].map { |schema| convert_property_to_gemini(schema) },
95
+ description: property_schema[:description]
96
+ }.compact
97
+ end
98
+
99
+ if property_schema[:anyOf]
100
+ return {
101
+ anyOf: property_schema[:anyOf].map { |schema| convert_property_to_gemini(schema) },
102
+ description: property_schema[:description]
103
+ }.compact
104
+ end
105
+
104
106
  case property_schema[:type]
105
107
  when "string"
106
108
  result = { type: "string" }
@@ -13,14 +13,24 @@ module DSPy
13
13
 
14
14
  @structured_outputs_enabled = structured_outputs
15
15
 
16
+ # Disable streaming for VCR tests since SSE responses don't record properly
17
+ @use_streaming = true
18
+ begin
19
+ @use_streaming = false if defined?(VCR) && VCR.current_cassette
20
+ rescue
21
+ # If VCR is not available or any error occurs, use streaming
22
+ @use_streaming = true
23
+ end
24
+
16
25
  @client = Gemini.new(
17
26
  credentials: {
18
27
  service: 'generative-language-api',
19
- api_key: api_key
28
+ api_key: api_key,
29
+ version: 'v1beta' # Use beta API version for structured outputs support
20
30
  },
21
31
  options: {
22
32
  model: model,
23
- server_sent_events: true
33
+ server_sent_events: @use_streaming
24
34
  }
25
35
  )
26
36
  end
@@ -43,33 +53,47 @@ module DSPy
43
53
  }.merge(extra_params)
44
54
 
45
55
  begin
46
- # Always use streaming
47
56
  content = ""
48
57
  final_response_data = nil
49
58
 
50
- @client.stream_generate_content(request_params) do |chunk|
51
- # Handle case where chunk might be a string (from SSE VCR)
52
- if chunk.is_a?(String)
53
- begin
54
- chunk = JSON.parse(chunk)
55
- rescue JSON::ParserError => e
56
- raise AdapterError, "Failed to parse Gemini streaming response: #{e.message}"
59
+ # Check if we're using streaming or not
60
+ if @use_streaming
61
+ # Streaming mode
62
+ @client.stream_generate_content(request_params) do |chunk|
63
+ # Handle case where chunk might be a string (from SSE VCR)
64
+ if chunk.is_a?(String)
65
+ begin
66
+ chunk = JSON.parse(chunk)
67
+ rescue JSON::ParserError => e
68
+ raise AdapterError, "Failed to parse Gemini streaming response: #{e.message}"
69
+ end
57
70
  end
58
- end
59
-
60
- # Extract content from chunks
61
- if chunk.dig('candidates', 0, 'content', 'parts')
62
- chunk_text = extract_text_from_parts(chunk.dig('candidates', 0, 'content', 'parts'))
63
- content += chunk_text
64
71
 
65
- # Call block only if provided (for real streaming)
66
- block.call(chunk) if block_given?
72
+ # Extract content from chunks
73
+ if chunk.dig('candidates', 0, 'content', 'parts')
74
+ chunk_text = extract_text_from_parts(chunk.dig('candidates', 0, 'content', 'parts'))
75
+ content += chunk_text
76
+
77
+ # Call block only if provided (for real streaming)
78
+ block.call(chunk) if block_given?
79
+ end
80
+
81
+ # Store final response data (usage, metadata) from last chunk
82
+ if chunk['usageMetadata'] || chunk.dig('candidates', 0, 'finishReason')
83
+ final_response_data = chunk
84
+ end
67
85
  end
86
+ else
87
+ # Non-streaming mode (for VCR tests)
88
+ response = @client.generate_content(request_params)
68
89
 
69
- # Store final response data (usage, metadata) from last chunk
70
- if chunk['usageMetadata'] || chunk.dig('candidates', 0, 'finishReason')
71
- final_response_data = chunk
90
+ # Extract content from single response
91
+ if response.dig('candidates', 0, 'content', 'parts')
92
+ content = extract_text_from_parts(response.dig('candidates', 0, 'content', 'parts'))
72
93
  end
94
+
95
+ # Use response as final data
96
+ final_response_data = response
73
97
  end
74
98
 
75
99
  # Extract usage information from final chunk
@@ -1,7 +1,6 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  require "sorbet-runtime"
4
- require_relative "../../cache_manager"
5
4
 
6
5
  module DSPy
7
6
  class LM
@@ -22,22 +21,12 @@ module DSPy
22
21
 
23
22
  sig { params(signature_class: T.class_of(DSPy::Signature), name: T.nilable(String), strict: T::Boolean).returns(T::Hash[Symbol, T.untyped]) }
24
23
  def self.to_openai_format(signature_class, name: nil, strict: true)
25
- # Build cache params from the method parameters
26
- cache_params = { strict: strict }
27
- cache_params[:name] = name if name
28
-
29
- # Check cache first
30
- cache_manager = DSPy::LM.cache_manager
31
- cached_schema = cache_manager.get_schema(signature_class, "openai", cache_params)
32
-
33
- if cached_schema
34
- DSPy.logger.debug("Using cached schema for #{signature_class.name}")
35
- return cached_schema
36
- end
37
-
38
24
  # Get the output JSON schema from the signature class
39
25
  output_schema = signature_class.output_json_schema
40
26
 
27
+ # Convert oneOf to anyOf where safe, or raise error for unsupported cases
28
+ output_schema = convert_oneof_to_anyof_if_safe(output_schema)
29
+
41
30
  # Build the complete schema with OpenAI-specific modifications
42
31
  dspy_schema = {
43
32
  "$schema": "http://json-schema.org/draft-06/schema#",
@@ -59,7 +48,7 @@ module DSPy
59
48
  end
60
49
 
61
50
  # Wrap in OpenAI's required format
62
- result = {
51
+ {
63
52
  type: "json_schema",
64
53
  json_schema: {
65
54
  name: schema_name,
@@ -67,34 +56,75 @@ module DSPy
67
56
  schema: openai_schema
68
57
  }
69
58
  }
59
+ end
60
+
61
+ # Convert oneOf to anyOf if safe (discriminated unions), otherwise raise error
62
+ sig { params(schema: T.untyped).returns(T.untyped) }
63
+ def self.convert_oneof_to_anyof_if_safe(schema)
64
+ return schema unless schema.is_a?(Hash)
65
+
66
+ result = schema.dup
67
+
68
+ # Check if this schema has oneOf that we can safely convert
69
+ if result[:oneOf]
70
+ if all_have_discriminators?(result[:oneOf])
71
+ # Safe to convert - discriminators ensure mutual exclusivity
72
+ result[:anyOf] = result.delete(:oneOf).map { |s| convert_oneof_to_anyof_if_safe(s) }
73
+ else
74
+ # Unsafe conversion - raise error
75
+ raise DSPy::UnsupportedSchemaError.new(
76
+ "OpenAI structured outputs do not support oneOf schemas without discriminator fields. " \
77
+ "The schema contains union types that cannot be safely converted to anyOf. " \
78
+ "Please use enhanced_prompting strategy instead or add discriminator fields to union types."
79
+ )
80
+ end
81
+ end
82
+
83
+ # Recursively process nested schemas
84
+ if result[:properties].is_a?(Hash)
85
+ result[:properties] = result[:properties].transform_values { |v| convert_oneof_to_anyof_if_safe(v) }
86
+ end
70
87
 
71
- # Cache the result with same params
72
- cache_manager.cache_schema(signature_class, "openai", result, cache_params)
88
+ if result[:items].is_a?(Hash)
89
+ result[:items] = convert_oneof_to_anyof_if_safe(result[:items])
90
+ end
91
+
92
+ # Process arrays of schema items
93
+ if result[:items].is_a?(Array)
94
+ result[:items] = result[:items].map { |item|
95
+ item.is_a?(Hash) ? convert_oneof_to_anyof_if_safe(item) : item
96
+ }
97
+ end
98
+
99
+ # Process anyOf arrays (in case there are nested oneOf within anyOf)
100
+ if result[:anyOf].is_a?(Array)
101
+ result[:anyOf] = result[:anyOf].map { |item|
102
+ item.is_a?(Hash) ? convert_oneof_to_anyof_if_safe(item) : item
103
+ }
104
+ end
73
105
 
74
106
  result
75
107
  end
108
+
109
+ # Check if all schemas in a oneOf array have discriminator fields (const properties)
110
+ sig { params(schemas: T::Array[T.untyped]).returns(T::Boolean) }
111
+ def self.all_have_discriminators?(schemas)
112
+ schemas.all? do |schema|
113
+ next false unless schema.is_a?(Hash)
114
+ next false unless schema[:properties].is_a?(Hash)
115
+
116
+ # Check if any property has a const value (our discriminator pattern)
117
+ schema[:properties].any? { |_, prop| prop.is_a?(Hash) && prop[:const] }
118
+ end
119
+ end
76
120
 
77
121
  sig { params(model: String).returns(T::Boolean) }
78
122
  def self.supports_structured_outputs?(model)
79
- # Check cache first
80
- cache_manager = DSPy::LM.cache_manager
81
- cached_result = cache_manager.get_capability(model, "structured_outputs")
82
-
83
- if !cached_result.nil?
84
- DSPy.logger.debug("Using cached capability check for #{model}")
85
- return cached_result
86
- end
87
-
88
123
  # Extract base model name without provider prefix
89
124
  base_model = model.sub(/^openai\//, "")
90
125
 
91
126
  # Check if it's a supported model or a newer version
92
- result = STRUCTURED_OUTPUT_MODELS.any? { |supported| base_model.start_with?(supported) }
93
-
94
- # Cache the result
95
- cache_manager.cache_capability(model, "structured_outputs", result)
96
-
97
- result
127
+ STRUCTURED_OUTPUT_MODELS.any? { |supported| base_model.start_with?(supported) }
98
128
  end
99
129
 
100
130
  sig { params(schema: T::Hash[Symbol, T.untyped]).returns(T::Array[String]) }
@@ -226,8 +256,8 @@ module DSPy
226
256
  end
227
257
  end
228
258
 
229
- # Process oneOf/anyOf/allOf
230
- [:oneOf, :anyOf, :allOf].each do |key|
259
+ # Process anyOf/allOf (oneOf should be converted to anyOf by this point)
260
+ [:anyOf, :allOf].each do |key|
231
261
  if result[key].is_a?(Array)
232
262
  result[key] = result[key].map do |sub_schema|
233
263
  sub_schema.is_a?(Hash) ? add_additional_properties_recursively(sub_schema) : sub_schema
@@ -272,8 +302,8 @@ module DSPy
272
302
  max_depth = [max_depth, items_depth].max
273
303
  end
274
304
 
275
- # Check oneOf/anyOf/allOf
276
- [:oneOf, :anyOf, :allOf].each do |key|
305
+ # Check anyOf/allOf (oneOf should be converted to anyOf by this point)
306
+ [:anyOf, :allOf].each do |key|
277
307
  if schema[key].is_a?(Array)
278
308
  schema[key].each do |sub_schema|
279
309
  if sub_schema.is_a?(Hash)
@@ -291,8 +321,8 @@ module DSPy
291
321
  def self.contains_pattern_properties?(schema)
292
322
  return true if schema[:patternProperties]
293
323
 
294
- # Recursively check nested schemas
295
- [:properties, :items, :oneOf, :anyOf, :allOf].each do |key|
324
+ # Recursively check nested schemas (oneOf should be converted to anyOf by this point)
325
+ [:properties, :items, :anyOf, :allOf].each do |key|
296
326
  value = schema[key]
297
327
  case value
298
328
  when Hash
@@ -309,8 +339,8 @@ module DSPy
309
339
  def self.contains_conditional_schemas?(schema)
310
340
  return true if schema[:if] || schema[:then] || schema[:else]
311
341
 
312
- # Recursively check nested schemas
313
- [:properties, :items, :oneOf, :anyOf, :allOf].each do |key|
342
+ # Recursively check nested schemas (oneOf should be converted to anyOf by this point)
343
+ [:properties, :items, :anyOf, :allOf].each do |key|
314
344
  value = schema[key]
315
345
  case value
316
346
  when Hash