dspy 0.30.1 → 0.31.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +51 -64
- data/lib/dspy/evals.rb +21 -2
- data/lib/dspy/lm/adapter_factory.rb +40 -17
- data/lib/dspy/lm/errors.rb +3 -0
- data/lib/dspy/lm/json_strategy.rb +24 -8
- data/lib/dspy/lm.rb +62 -19
- data/lib/dspy/mixins/type_coercion.rb +2 -0
- data/lib/dspy/module.rb +6 -6
- data/lib/dspy/prompt.rb +207 -36
- data/lib/dspy/re_act.rb +50 -17
- data/lib/dspy/schema/sorbet_json_schema.rb +5 -2
- data/lib/dspy/schema/sorbet_toon_adapter.rb +81 -0
- data/lib/dspy/structured_outputs_prompt.rb +5 -3
- data/lib/dspy/type_serializer.rb +2 -1
- data/lib/dspy/version.rb +1 -1
- metadata +14 -51
- data/lib/dspy/lm/adapters/anthropic_adapter.rb +0 -291
- data/lib/dspy/lm/adapters/gemini/schema_converter.rb +0 -186
- data/lib/dspy/lm/adapters/gemini_adapter.rb +0 -220
- data/lib/dspy/lm/adapters/ollama_adapter.rb +0 -73
- data/lib/dspy/lm/adapters/openai/schema_converter.rb +0 -359
- data/lib/dspy/lm/adapters/openai_adapter.rb +0 -188
- data/lib/dspy/lm/adapters/openrouter_adapter.rb +0 -68
|
@@ -1,220 +0,0 @@
|
|
|
1
|
-
# frozen_string_literal: true
|
|
2
|
-
|
|
3
|
-
require 'gemini-ai'
|
|
4
|
-
require 'json'
|
|
5
|
-
require_relative '../vision_models'
|
|
6
|
-
|
|
7
|
-
module DSPy
|
|
8
|
-
class LM
|
|
9
|
-
class GeminiAdapter < Adapter
|
|
10
|
-
def initialize(model:, api_key:, structured_outputs: false)
|
|
11
|
-
super(model: model, api_key: api_key)
|
|
12
|
-
validate_api_key!(api_key, 'gemini')
|
|
13
|
-
|
|
14
|
-
@structured_outputs_enabled = structured_outputs
|
|
15
|
-
|
|
16
|
-
# Disable streaming for VCR tests since SSE responses don't record properly
|
|
17
|
-
# But keep streaming enabled for SSEVCR tests (SSE-specific cassettes)
|
|
18
|
-
@use_streaming = true
|
|
19
|
-
begin
|
|
20
|
-
vcr_active = defined?(VCR) && VCR.current_cassette
|
|
21
|
-
ssevcr_active = defined?(SSEVCR) && SSEVCR.turned_on?
|
|
22
|
-
|
|
23
|
-
# Only disable streaming if regular VCR is active but SSEVCR is not
|
|
24
|
-
@use_streaming = false if vcr_active && !ssevcr_active
|
|
25
|
-
rescue
|
|
26
|
-
# If VCR/SSEVCR is not available or any error occurs, use streaming
|
|
27
|
-
@use_streaming = true
|
|
28
|
-
end
|
|
29
|
-
|
|
30
|
-
@client = Gemini.new(
|
|
31
|
-
credentials: {
|
|
32
|
-
service: 'generative-language-api',
|
|
33
|
-
api_key: api_key,
|
|
34
|
-
version: 'v1beta' # Use beta API version for structured outputs support
|
|
35
|
-
},
|
|
36
|
-
options: {
|
|
37
|
-
model: model,
|
|
38
|
-
server_sent_events: @use_streaming
|
|
39
|
-
}
|
|
40
|
-
)
|
|
41
|
-
end
|
|
42
|
-
|
|
43
|
-
def chat(messages:, signature: nil, **extra_params, &block)
|
|
44
|
-
normalized_messages = normalize_messages(messages)
|
|
45
|
-
|
|
46
|
-
# Validate vision support if images are present
|
|
47
|
-
if contains_images?(normalized_messages)
|
|
48
|
-
VisionModels.validate_vision_support!('gemini', model)
|
|
49
|
-
# Convert messages to Gemini format with proper image handling
|
|
50
|
-
normalized_messages = format_multimodal_messages(normalized_messages)
|
|
51
|
-
end
|
|
52
|
-
|
|
53
|
-
# Convert DSPy message format to Gemini format
|
|
54
|
-
gemini_messages = convert_messages_to_gemini_format(normalized_messages)
|
|
55
|
-
|
|
56
|
-
request_params = {
|
|
57
|
-
contents: gemini_messages
|
|
58
|
-
}.merge(extra_params)
|
|
59
|
-
|
|
60
|
-
begin
|
|
61
|
-
content = ""
|
|
62
|
-
final_response_data = nil
|
|
63
|
-
|
|
64
|
-
# Check if we're using streaming or not
|
|
65
|
-
if @use_streaming
|
|
66
|
-
# Streaming mode
|
|
67
|
-
@client.stream_generate_content(request_params) do |chunk|
|
|
68
|
-
# Handle case where chunk might be a string (from SSE VCR)
|
|
69
|
-
if chunk.is_a?(String)
|
|
70
|
-
begin
|
|
71
|
-
chunk = JSON.parse(chunk)
|
|
72
|
-
rescue JSON::ParserError => e
|
|
73
|
-
raise AdapterError, "Failed to parse Gemini streaming response: #{e.message}"
|
|
74
|
-
end
|
|
75
|
-
end
|
|
76
|
-
|
|
77
|
-
# Extract content from chunks
|
|
78
|
-
if chunk.dig('candidates', 0, 'content', 'parts')
|
|
79
|
-
chunk_text = extract_text_from_parts(chunk.dig('candidates', 0, 'content', 'parts'))
|
|
80
|
-
content += chunk_text
|
|
81
|
-
|
|
82
|
-
# Call block only if provided (for real streaming)
|
|
83
|
-
block.call(chunk) if block_given?
|
|
84
|
-
end
|
|
85
|
-
|
|
86
|
-
# Store final response data (usage, metadata) from last chunk
|
|
87
|
-
if chunk['usageMetadata'] || chunk.dig('candidates', 0, 'finishReason')
|
|
88
|
-
final_response_data = chunk
|
|
89
|
-
end
|
|
90
|
-
end
|
|
91
|
-
else
|
|
92
|
-
# Non-streaming mode (for VCR tests)
|
|
93
|
-
response = @client.generate_content(request_params)
|
|
94
|
-
|
|
95
|
-
# Extract content from single response
|
|
96
|
-
if response.dig('candidates', 0, 'content', 'parts')
|
|
97
|
-
content = extract_text_from_parts(response.dig('candidates', 0, 'content', 'parts'))
|
|
98
|
-
end
|
|
99
|
-
|
|
100
|
-
# Use response as final data
|
|
101
|
-
final_response_data = response
|
|
102
|
-
end
|
|
103
|
-
|
|
104
|
-
# Extract usage information from final chunk
|
|
105
|
-
usage_data = final_response_data&.dig('usageMetadata')
|
|
106
|
-
usage_struct = usage_data ? UsageFactory.create('gemini', usage_data) : nil
|
|
107
|
-
|
|
108
|
-
# Create metadata from final chunk
|
|
109
|
-
metadata = {
|
|
110
|
-
provider: 'gemini',
|
|
111
|
-
model: model,
|
|
112
|
-
finish_reason: final_response_data&.dig('candidates', 0, 'finishReason'),
|
|
113
|
-
safety_ratings: final_response_data&.dig('candidates', 0, 'safetyRatings'),
|
|
114
|
-
streaming: block_given?
|
|
115
|
-
}
|
|
116
|
-
|
|
117
|
-
# Create typed metadata
|
|
118
|
-
typed_metadata = ResponseMetadataFactory.create('gemini', metadata)
|
|
119
|
-
|
|
120
|
-
Response.new(
|
|
121
|
-
content: content,
|
|
122
|
-
usage: usage_struct,
|
|
123
|
-
metadata: typed_metadata
|
|
124
|
-
)
|
|
125
|
-
rescue => e
|
|
126
|
-
handle_gemini_error(e)
|
|
127
|
-
end
|
|
128
|
-
end
|
|
129
|
-
|
|
130
|
-
private
|
|
131
|
-
|
|
132
|
-
# Convert DSPy message format to Gemini format
|
|
133
|
-
def convert_messages_to_gemini_format(messages)
|
|
134
|
-
# Gemini expects contents array with role and parts
|
|
135
|
-
messages.map do |msg|
|
|
136
|
-
role = case msg[:role]
|
|
137
|
-
when 'system'
|
|
138
|
-
'user' # Gemini doesn't have explicit system role, merge with user
|
|
139
|
-
when 'assistant'
|
|
140
|
-
'model'
|
|
141
|
-
else
|
|
142
|
-
msg[:role]
|
|
143
|
-
end
|
|
144
|
-
|
|
145
|
-
if msg[:content].is_a?(Array)
|
|
146
|
-
# Multimodal content
|
|
147
|
-
parts = msg[:content].map do |item|
|
|
148
|
-
case item[:type]
|
|
149
|
-
when 'text'
|
|
150
|
-
{ text: item[:text] }
|
|
151
|
-
when 'image'
|
|
152
|
-
item[:image].to_gemini_format
|
|
153
|
-
else
|
|
154
|
-
item
|
|
155
|
-
end
|
|
156
|
-
end
|
|
157
|
-
|
|
158
|
-
{ role: role, parts: parts }
|
|
159
|
-
else
|
|
160
|
-
# Text-only content
|
|
161
|
-
{ role: role, parts: [{ text: msg[:content] }] }
|
|
162
|
-
end
|
|
163
|
-
end
|
|
164
|
-
end
|
|
165
|
-
|
|
166
|
-
# Extract text content from Gemini parts array
|
|
167
|
-
def extract_text_from_parts(parts)
|
|
168
|
-
return "" unless parts.is_a?(Array)
|
|
169
|
-
|
|
170
|
-
parts.map { |part| part['text'] }.compact.join
|
|
171
|
-
end
|
|
172
|
-
|
|
173
|
-
# Format multimodal messages for Gemini
|
|
174
|
-
def format_multimodal_messages(messages)
|
|
175
|
-
messages.map do |msg|
|
|
176
|
-
if msg[:content].is_a?(Array)
|
|
177
|
-
# Convert multimodal content to Gemini format
|
|
178
|
-
formatted_content = msg[:content].map do |item|
|
|
179
|
-
case item[:type]
|
|
180
|
-
when 'text'
|
|
181
|
-
{ type: 'text', text: item[:text] }
|
|
182
|
-
when 'image'
|
|
183
|
-
# Validate image compatibility before formatting
|
|
184
|
-
item[:image].validate_for_provider!('gemini')
|
|
185
|
-
item[:image].to_gemini_format
|
|
186
|
-
else
|
|
187
|
-
item
|
|
188
|
-
end
|
|
189
|
-
end
|
|
190
|
-
|
|
191
|
-
{
|
|
192
|
-
role: msg[:role],
|
|
193
|
-
content: formatted_content
|
|
194
|
-
}
|
|
195
|
-
else
|
|
196
|
-
msg
|
|
197
|
-
end
|
|
198
|
-
end
|
|
199
|
-
end
|
|
200
|
-
|
|
201
|
-
# Handle Gemini-specific errors
|
|
202
|
-
def handle_gemini_error(error)
|
|
203
|
-
error_msg = error.message.to_s
|
|
204
|
-
|
|
205
|
-
if error_msg.include?('API_KEY') || error_msg.include?('status 400') || error_msg.include?('status 401') || error_msg.include?('status 403')
|
|
206
|
-
raise AdapterError, "Gemini authentication failed: #{error_msg}. Check your API key."
|
|
207
|
-
elsif error_msg.include?('RATE_LIMIT') || error_msg.downcase.include?('quota') || error_msg.include?('status 429')
|
|
208
|
-
raise AdapterError, "Gemini rate limit exceeded: #{error_msg}. Please wait and try again."
|
|
209
|
-
elsif error_msg.include?('SAFETY') || error_msg.include?('blocked')
|
|
210
|
-
raise AdapterError, "Gemini content was blocked by safety filters: #{error_msg}"
|
|
211
|
-
elsif error_msg.include?('image') || error_msg.include?('media')
|
|
212
|
-
raise AdapterError, "Gemini image processing failed: #{error_msg}. Ensure your image is a valid format and under size limits."
|
|
213
|
-
else
|
|
214
|
-
# Generic error handling
|
|
215
|
-
raise AdapterError, "Gemini adapter error: #{error_msg}"
|
|
216
|
-
end
|
|
217
|
-
end
|
|
218
|
-
end
|
|
219
|
-
end
|
|
220
|
-
end
|
|
@@ -1,73 +0,0 @@
|
|
|
1
|
-
# frozen_string_literal: true
|
|
2
|
-
|
|
3
|
-
require 'openai'
|
|
4
|
-
|
|
5
|
-
module DSPy
|
|
6
|
-
class LM
|
|
7
|
-
class OllamaAdapter < OpenAIAdapter
|
|
8
|
-
DEFAULT_BASE_URL = 'http://localhost:11434/v1'
|
|
9
|
-
|
|
10
|
-
def initialize(model:, api_key: nil, base_url: nil, structured_outputs: true)
|
|
11
|
-
# Ollama doesn't require API key for local instances
|
|
12
|
-
# But may need it for remote/protected instances
|
|
13
|
-
api_key ||= 'ollama' # OpenAI client requires non-empty key
|
|
14
|
-
base_url ||= DEFAULT_BASE_URL
|
|
15
|
-
|
|
16
|
-
# Store base_url before calling super
|
|
17
|
-
@base_url = base_url
|
|
18
|
-
|
|
19
|
-
# Don't call parent's initialize, do it manually to control client creation
|
|
20
|
-
@model = model
|
|
21
|
-
@api_key = api_key
|
|
22
|
-
@structured_outputs_enabled = structured_outputs
|
|
23
|
-
validate_configuration!
|
|
24
|
-
|
|
25
|
-
# Create client with custom base URL
|
|
26
|
-
@client = OpenAI::Client.new(
|
|
27
|
-
api_key: @api_key,
|
|
28
|
-
base_url: @base_url
|
|
29
|
-
)
|
|
30
|
-
end
|
|
31
|
-
|
|
32
|
-
def chat(messages:, signature: nil, response_format: nil, &block)
|
|
33
|
-
# For Ollama, we need to be more lenient with structured outputs
|
|
34
|
-
# as it may not fully support OpenAI's response_format spec
|
|
35
|
-
begin
|
|
36
|
-
super
|
|
37
|
-
rescue => e
|
|
38
|
-
# If structured output fails, retry with enhanced prompting
|
|
39
|
-
if @structured_outputs_enabled && signature && e.message.include?('response_format')
|
|
40
|
-
DSPy.logger.debug("Ollama structured output failed, falling back to enhanced prompting")
|
|
41
|
-
@structured_outputs_enabled = false
|
|
42
|
-
retry
|
|
43
|
-
else
|
|
44
|
-
raise
|
|
45
|
-
end
|
|
46
|
-
end
|
|
47
|
-
end
|
|
48
|
-
|
|
49
|
-
private
|
|
50
|
-
|
|
51
|
-
def validate_configuration!
|
|
52
|
-
super
|
|
53
|
-
# Additional Ollama-specific validation could go here
|
|
54
|
-
end
|
|
55
|
-
|
|
56
|
-
def validate_api_key!(api_key, provider)
|
|
57
|
-
# For Ollama, API key is optional for local instances
|
|
58
|
-
# Only validate if it looks like a remote URL
|
|
59
|
-
if @base_url && !@base_url.include?('localhost') && !@base_url.include?('127.0.0.1')
|
|
60
|
-
super
|
|
61
|
-
end
|
|
62
|
-
end
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
# Ollama may have different model support for structured outputs
|
|
66
|
-
def supports_structured_outputs?
|
|
67
|
-
# For now, assume all Ollama models support basic JSON mode
|
|
68
|
-
# but may not support full OpenAI structured output spec
|
|
69
|
-
true
|
|
70
|
-
end
|
|
71
|
-
end
|
|
72
|
-
end
|
|
73
|
-
end
|
|
@@ -1,359 +0,0 @@
|
|
|
1
|
-
# frozen_string_literal: true
|
|
2
|
-
|
|
3
|
-
require "sorbet-runtime"
|
|
4
|
-
|
|
5
|
-
module DSPy
|
|
6
|
-
class LM
|
|
7
|
-
module Adapters
|
|
8
|
-
module OpenAI
|
|
9
|
-
# Converts DSPy signatures to OpenAI structured output format
|
|
10
|
-
class SchemaConverter
|
|
11
|
-
extend T::Sig
|
|
12
|
-
|
|
13
|
-
# Models that support structured outputs as of July 2025
|
|
14
|
-
STRUCTURED_OUTPUT_MODELS = T.let([
|
|
15
|
-
"gpt-4o-mini",
|
|
16
|
-
"gpt-4o-2024-08-06",
|
|
17
|
-
"gpt-4o",
|
|
18
|
-
"gpt-4-turbo",
|
|
19
|
-
"gpt-4-turbo-2024-04-09"
|
|
20
|
-
].freeze, T::Array[String])
|
|
21
|
-
|
|
22
|
-
sig { params(signature_class: T.class_of(DSPy::Signature), name: T.nilable(String), strict: T::Boolean).returns(T::Hash[Symbol, T.untyped]) }
|
|
23
|
-
def self.to_openai_format(signature_class, name: nil, strict: true)
|
|
24
|
-
# Get the output JSON schema from the signature class
|
|
25
|
-
output_schema = signature_class.output_json_schema
|
|
26
|
-
|
|
27
|
-
# Convert oneOf to anyOf where safe, or raise error for unsupported cases
|
|
28
|
-
output_schema = convert_oneof_to_anyof_if_safe(output_schema)
|
|
29
|
-
|
|
30
|
-
# Build the complete schema with OpenAI-specific modifications
|
|
31
|
-
dspy_schema = {
|
|
32
|
-
"$schema": "http://json-schema.org/draft-06/schema#",
|
|
33
|
-
type: "object",
|
|
34
|
-
properties: output_schema[:properties] || {},
|
|
35
|
-
required: openai_required_fields(signature_class, output_schema)
|
|
36
|
-
}
|
|
37
|
-
|
|
38
|
-
# Generate a schema name if not provided
|
|
39
|
-
schema_name = name || generate_schema_name(signature_class)
|
|
40
|
-
|
|
41
|
-
# Remove the $schema field as OpenAI doesn't use it
|
|
42
|
-
openai_schema = dspy_schema.except(:$schema)
|
|
43
|
-
|
|
44
|
-
# Add additionalProperties: false for strict mode and fix nested struct schemas
|
|
45
|
-
if strict
|
|
46
|
-
openai_schema = add_additional_properties_recursively(openai_schema)
|
|
47
|
-
openai_schema = fix_nested_struct_required_fields(openai_schema)
|
|
48
|
-
end
|
|
49
|
-
|
|
50
|
-
# Wrap in OpenAI's required format
|
|
51
|
-
{
|
|
52
|
-
type: "json_schema",
|
|
53
|
-
json_schema: {
|
|
54
|
-
name: schema_name,
|
|
55
|
-
strict: strict,
|
|
56
|
-
schema: openai_schema
|
|
57
|
-
}
|
|
58
|
-
}
|
|
59
|
-
end
|
|
60
|
-
|
|
61
|
-
# Convert oneOf to anyOf if safe (discriminated unions), otherwise raise error
|
|
62
|
-
sig { params(schema: T.untyped).returns(T.untyped) }
|
|
63
|
-
def self.convert_oneof_to_anyof_if_safe(schema)
|
|
64
|
-
return schema unless schema.is_a?(Hash)
|
|
65
|
-
|
|
66
|
-
result = schema.dup
|
|
67
|
-
|
|
68
|
-
# Check if this schema has oneOf that we can safely convert
|
|
69
|
-
if result[:oneOf]
|
|
70
|
-
if all_have_discriminators?(result[:oneOf])
|
|
71
|
-
# Safe to convert - discriminators ensure mutual exclusivity
|
|
72
|
-
result[:anyOf] = result.delete(:oneOf).map { |s| convert_oneof_to_anyof_if_safe(s) }
|
|
73
|
-
else
|
|
74
|
-
# Unsafe conversion - raise error
|
|
75
|
-
raise DSPy::UnsupportedSchemaError.new(
|
|
76
|
-
"OpenAI structured outputs do not support oneOf schemas without discriminator fields. " \
|
|
77
|
-
"The schema contains union types that cannot be safely converted to anyOf. " \
|
|
78
|
-
"Please use enhanced_prompting strategy instead or add discriminator fields to union types."
|
|
79
|
-
)
|
|
80
|
-
end
|
|
81
|
-
end
|
|
82
|
-
|
|
83
|
-
# Recursively process nested schemas
|
|
84
|
-
if result[:properties].is_a?(Hash)
|
|
85
|
-
result[:properties] = result[:properties].transform_values { |v| convert_oneof_to_anyof_if_safe(v) }
|
|
86
|
-
end
|
|
87
|
-
|
|
88
|
-
if result[:items].is_a?(Hash)
|
|
89
|
-
result[:items] = convert_oneof_to_anyof_if_safe(result[:items])
|
|
90
|
-
end
|
|
91
|
-
|
|
92
|
-
# Process arrays of schema items
|
|
93
|
-
if result[:items].is_a?(Array)
|
|
94
|
-
result[:items] = result[:items].map { |item|
|
|
95
|
-
item.is_a?(Hash) ? convert_oneof_to_anyof_if_safe(item) : item
|
|
96
|
-
}
|
|
97
|
-
end
|
|
98
|
-
|
|
99
|
-
# Process anyOf arrays (in case there are nested oneOf within anyOf)
|
|
100
|
-
if result[:anyOf].is_a?(Array)
|
|
101
|
-
result[:anyOf] = result[:anyOf].map { |item|
|
|
102
|
-
item.is_a?(Hash) ? convert_oneof_to_anyof_if_safe(item) : item
|
|
103
|
-
}
|
|
104
|
-
end
|
|
105
|
-
|
|
106
|
-
result
|
|
107
|
-
end
|
|
108
|
-
|
|
109
|
-
# Check if all schemas in a oneOf array have discriminator fields (const properties)
|
|
110
|
-
sig { params(schemas: T::Array[T.untyped]).returns(T::Boolean) }
|
|
111
|
-
def self.all_have_discriminators?(schemas)
|
|
112
|
-
schemas.all? do |schema|
|
|
113
|
-
next false unless schema.is_a?(Hash)
|
|
114
|
-
next false unless schema[:properties].is_a?(Hash)
|
|
115
|
-
|
|
116
|
-
# Check if any property has a const value (our discriminator pattern)
|
|
117
|
-
schema[:properties].any? { |_, prop| prop.is_a?(Hash) && prop[:const] }
|
|
118
|
-
end
|
|
119
|
-
end
|
|
120
|
-
|
|
121
|
-
sig { params(model: String).returns(T::Boolean) }
|
|
122
|
-
def self.supports_structured_outputs?(model)
|
|
123
|
-
# Extract base model name without provider prefix
|
|
124
|
-
base_model = model.sub(/^openai\//, "")
|
|
125
|
-
|
|
126
|
-
# Check if it's a supported model or a newer version
|
|
127
|
-
STRUCTURED_OUTPUT_MODELS.any? { |supported| base_model.start_with?(supported) }
|
|
128
|
-
end
|
|
129
|
-
|
|
130
|
-
sig { params(schema: T::Hash[Symbol, T.untyped]).returns(T::Array[String]) }
|
|
131
|
-
def self.validate_compatibility(schema)
|
|
132
|
-
issues = []
|
|
133
|
-
|
|
134
|
-
# Check for deeply nested objects (OpenAI has depth limits)
|
|
135
|
-
depth = calculate_depth(schema)
|
|
136
|
-
if depth > 5
|
|
137
|
-
issues << "Schema depth (#{depth}) exceeds recommended limit of 5 levels"
|
|
138
|
-
end
|
|
139
|
-
|
|
140
|
-
# Check for unsupported JSON Schema features
|
|
141
|
-
if contains_pattern_properties?(schema)
|
|
142
|
-
issues << "Pattern properties are not supported in OpenAI structured outputs"
|
|
143
|
-
end
|
|
144
|
-
|
|
145
|
-
if contains_conditional_schemas?(schema)
|
|
146
|
-
issues << "Conditional schemas (if/then/else) are not supported"
|
|
147
|
-
end
|
|
148
|
-
|
|
149
|
-
issues
|
|
150
|
-
end
|
|
151
|
-
|
|
152
|
-
private
|
|
153
|
-
|
|
154
|
-
# OpenAI structured outputs requires ALL properties to be in the required array
|
|
155
|
-
# For T.nilable fields without defaults, we warn the user and mark as required
|
|
156
|
-
sig { params(signature_class: T.class_of(DSPy::Signature), output_schema: T::Hash[Symbol, T.untyped]).returns(T::Array[String]) }
|
|
157
|
-
def self.openai_required_fields(signature_class, output_schema)
|
|
158
|
-
all_properties = output_schema[:properties]&.keys || []
|
|
159
|
-
original_required = output_schema[:required] || []
|
|
160
|
-
|
|
161
|
-
# For OpenAI structured outputs, we need ALL properties to be required
|
|
162
|
-
# but warn about T.nilable fields without defaults
|
|
163
|
-
field_descriptors = signature_class.instance_variable_get(:@output_field_descriptors) || {}
|
|
164
|
-
|
|
165
|
-
all_properties.each do |property_name|
|
|
166
|
-
descriptor = field_descriptors[property_name.to_sym]
|
|
167
|
-
|
|
168
|
-
# If field is not originally required and doesn't have a default
|
|
169
|
-
if !original_required.include?(property_name.to_s) && descriptor && !descriptor.has_default
|
|
170
|
-
DSPy.logger.warn(
|
|
171
|
-
"OpenAI structured outputs: T.nilable field '#{property_name}' without default will be marked as required. " \
|
|
172
|
-
"Consider adding a default value or using a different provider for optional fields."
|
|
173
|
-
)
|
|
174
|
-
end
|
|
175
|
-
end
|
|
176
|
-
|
|
177
|
-
# Return all properties as required (OpenAI requirement)
|
|
178
|
-
all_properties.map(&:to_s)
|
|
179
|
-
end
|
|
180
|
-
|
|
181
|
-
# Fix nested struct schemas to include all properties in required array (OpenAI requirement)
|
|
182
|
-
sig { params(schema: T::Hash[Symbol, T.untyped]).returns(T::Hash[Symbol, T.untyped]) }
|
|
183
|
-
def self.fix_nested_struct_required_fields(schema)
|
|
184
|
-
return schema unless schema.is_a?(Hash)
|
|
185
|
-
|
|
186
|
-
result = schema.dup
|
|
187
|
-
|
|
188
|
-
# If this is an object with properties, make all properties required
|
|
189
|
-
if result[:type] == "object" && result[:properties].is_a?(Hash)
|
|
190
|
-
all_property_names = result[:properties].keys.map(&:to_s)
|
|
191
|
-
result[:required] = all_property_names unless result[:required] == all_property_names
|
|
192
|
-
end
|
|
193
|
-
|
|
194
|
-
# Process nested objects recursively
|
|
195
|
-
if result[:properties].is_a?(Hash)
|
|
196
|
-
result[:properties] = result[:properties].transform_values do |prop|
|
|
197
|
-
if prop.is_a?(Hash)
|
|
198
|
-
processed = fix_nested_struct_required_fields(prop)
|
|
199
|
-
# Handle arrays with object items
|
|
200
|
-
if processed[:type] == "array" && processed[:items].is_a?(Hash)
|
|
201
|
-
processed[:items] = fix_nested_struct_required_fields(processed[:items])
|
|
202
|
-
end
|
|
203
|
-
processed
|
|
204
|
-
else
|
|
205
|
-
prop
|
|
206
|
-
end
|
|
207
|
-
end
|
|
208
|
-
end
|
|
209
|
-
|
|
210
|
-
result
|
|
211
|
-
end
|
|
212
|
-
|
|
213
|
-
sig { params(schema: T::Hash[Symbol, T.untyped]).returns(T::Hash[Symbol, T.untyped]) }
|
|
214
|
-
def self.add_additional_properties_recursively(schema)
|
|
215
|
-
return schema unless schema.is_a?(Hash)
|
|
216
|
-
|
|
217
|
-
result = schema.dup
|
|
218
|
-
|
|
219
|
-
# Add additionalProperties: false if this is an object
|
|
220
|
-
if result[:type] == "object"
|
|
221
|
-
result[:additionalProperties] = false
|
|
222
|
-
end
|
|
223
|
-
|
|
224
|
-
# Process properties recursively
|
|
225
|
-
if result[:properties].is_a?(Hash)
|
|
226
|
-
result[:properties] = result[:properties].transform_values do |prop|
|
|
227
|
-
if prop.is_a?(Hash)
|
|
228
|
-
processed = add_additional_properties_recursively(prop)
|
|
229
|
-
# Special handling for arrays - ensure their items have additionalProperties if they're objects
|
|
230
|
-
if processed[:type] == "array" && processed[:items].is_a?(Hash)
|
|
231
|
-
processed[:items] = add_additional_properties_recursively(processed[:items])
|
|
232
|
-
end
|
|
233
|
-
processed
|
|
234
|
-
else
|
|
235
|
-
prop
|
|
236
|
-
end
|
|
237
|
-
end
|
|
238
|
-
end
|
|
239
|
-
|
|
240
|
-
# Process array items
|
|
241
|
-
if result[:items].is_a?(Hash)
|
|
242
|
-
processed_items = add_additional_properties_recursively(result[:items])
|
|
243
|
-
# OpenAI requires additionalProperties on all objects, even in array items
|
|
244
|
-
if processed_items.is_a?(Hash) && processed_items[:type] == "object" && !processed_items.key?(:additionalProperties)
|
|
245
|
-
processed_items[:additionalProperties] = false
|
|
246
|
-
end
|
|
247
|
-
result[:items] = processed_items
|
|
248
|
-
elsif result[:items].is_a?(Array)
|
|
249
|
-
# Handle tuple validation
|
|
250
|
-
result[:items] = result[:items].map do |item|
|
|
251
|
-
processed = item.is_a?(Hash) ? add_additional_properties_recursively(item) : item
|
|
252
|
-
if processed.is_a?(Hash) && processed[:type] == "object" && !processed.key?(:additionalProperties)
|
|
253
|
-
processed[:additionalProperties] = false
|
|
254
|
-
end
|
|
255
|
-
processed
|
|
256
|
-
end
|
|
257
|
-
end
|
|
258
|
-
|
|
259
|
-
# Process anyOf/allOf (oneOf should be converted to anyOf by this point)
|
|
260
|
-
[:anyOf, :allOf].each do |key|
|
|
261
|
-
if result[key].is_a?(Array)
|
|
262
|
-
result[key] = result[key].map do |sub_schema|
|
|
263
|
-
sub_schema.is_a?(Hash) ? add_additional_properties_recursively(sub_schema) : sub_schema
|
|
264
|
-
end
|
|
265
|
-
end
|
|
266
|
-
end
|
|
267
|
-
|
|
268
|
-
result
|
|
269
|
-
end
|
|
270
|
-
|
|
271
|
-
sig { params(signature_class: T.class_of(DSPy::Signature)).returns(String) }
|
|
272
|
-
def self.generate_schema_name(signature_class)
|
|
273
|
-
# Use the signature class name
|
|
274
|
-
class_name = signature_class.name&.split("::")&.last
|
|
275
|
-
if class_name
|
|
276
|
-
class_name.gsub(/[^a-zA-Z0-9_]/, "_").downcase
|
|
277
|
-
else
|
|
278
|
-
# Fallback to a generic name
|
|
279
|
-
"dspy_output_#{Time.now.to_i}"
|
|
280
|
-
end
|
|
281
|
-
end
|
|
282
|
-
|
|
283
|
-
sig { params(schema: T::Hash[Symbol, T.untyped], current_depth: Integer).returns(Integer) }
|
|
284
|
-
def self.calculate_depth(schema, current_depth = 0)
|
|
285
|
-
return current_depth unless schema.is_a?(Hash)
|
|
286
|
-
|
|
287
|
-
max_depth = current_depth
|
|
288
|
-
|
|
289
|
-
# Check properties
|
|
290
|
-
if schema[:properties].is_a?(Hash)
|
|
291
|
-
schema[:properties].each_value do |prop|
|
|
292
|
-
if prop.is_a?(Hash)
|
|
293
|
-
prop_depth = calculate_depth(prop, current_depth + 1)
|
|
294
|
-
max_depth = [max_depth, prop_depth].max
|
|
295
|
-
end
|
|
296
|
-
end
|
|
297
|
-
end
|
|
298
|
-
|
|
299
|
-
# Check array items
|
|
300
|
-
if schema[:items].is_a?(Hash)
|
|
301
|
-
items_depth = calculate_depth(schema[:items], current_depth + 1)
|
|
302
|
-
max_depth = [max_depth, items_depth].max
|
|
303
|
-
end
|
|
304
|
-
|
|
305
|
-
# Check anyOf/allOf (oneOf should be converted to anyOf by this point)
|
|
306
|
-
[:anyOf, :allOf].each do |key|
|
|
307
|
-
if schema[key].is_a?(Array)
|
|
308
|
-
schema[key].each do |sub_schema|
|
|
309
|
-
if sub_schema.is_a?(Hash)
|
|
310
|
-
sub_depth = calculate_depth(sub_schema, current_depth + 1)
|
|
311
|
-
max_depth = [max_depth, sub_depth].max
|
|
312
|
-
end
|
|
313
|
-
end
|
|
314
|
-
end
|
|
315
|
-
end
|
|
316
|
-
|
|
317
|
-
max_depth
|
|
318
|
-
end
|
|
319
|
-
|
|
320
|
-
sig { params(schema: T::Hash[Symbol, T.untyped]).returns(T::Boolean) }
|
|
321
|
-
def self.contains_pattern_properties?(schema)
|
|
322
|
-
return true if schema[:patternProperties]
|
|
323
|
-
|
|
324
|
-
# Recursively check nested schemas (oneOf should be converted to anyOf by this point)
|
|
325
|
-
[:properties, :items, :anyOf, :allOf].each do |key|
|
|
326
|
-
value = schema[key]
|
|
327
|
-
case value
|
|
328
|
-
when Hash
|
|
329
|
-
return true if contains_pattern_properties?(value)
|
|
330
|
-
when Array
|
|
331
|
-
return true if value.any? { |v| v.is_a?(Hash) && contains_pattern_properties?(v) }
|
|
332
|
-
end
|
|
333
|
-
end
|
|
334
|
-
|
|
335
|
-
false
|
|
336
|
-
end
|
|
337
|
-
|
|
338
|
-
sig { params(schema: T::Hash[Symbol, T.untyped]).returns(T::Boolean) }
|
|
339
|
-
def self.contains_conditional_schemas?(schema)
|
|
340
|
-
return true if schema[:if] || schema[:then] || schema[:else]
|
|
341
|
-
|
|
342
|
-
# Recursively check nested schemas (oneOf should be converted to anyOf by this point)
|
|
343
|
-
[:properties, :items, :anyOf, :allOf].each do |key|
|
|
344
|
-
value = schema[key]
|
|
345
|
-
case value
|
|
346
|
-
when Hash
|
|
347
|
-
return true if contains_conditional_schemas?(value)
|
|
348
|
-
when Array
|
|
349
|
-
return true if value.any? { |v| v.is_a?(Hash) && contains_conditional_schemas?(v) }
|
|
350
|
-
end
|
|
351
|
-
end
|
|
352
|
-
|
|
353
|
-
false
|
|
354
|
-
end
|
|
355
|
-
end
|
|
356
|
-
end
|
|
357
|
-
end
|
|
358
|
-
end
|
|
359
|
-
end
|