dspy 0.27.1 → 0.27.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/dspy/chain_of_thought.rb +29 -37
- data/lib/dspy/code_act.rb +2 -2
- data/lib/dspy/context.rb +87 -34
- data/lib/dspy/errors.rb +2 -0
- data/lib/dspy/lm/adapters/gemini/schema_converter.rb +37 -35
- data/lib/dspy/lm/adapters/gemini_adapter.rb +45 -21
- data/lib/dspy/lm/adapters/openai/schema_converter.rb +70 -40
- data/lib/dspy/lm/adapters/openai_adapter.rb +35 -8
- data/lib/dspy/lm/retry_handler.rb +15 -6
- data/lib/dspy/lm/strategies/gemini_structured_output_strategy.rb +21 -8
- data/lib/dspy/lm.rb +54 -11
- data/lib/dspy/memory/local_embedding_engine.rb +27 -11
- data/lib/dspy/memory/memory_manager.rb +26 -9
- data/lib/dspy/mixins/type_coercion.rb +96 -3
- data/lib/dspy/module.rb +20 -2
- data/lib/dspy/observability/observation_type.rb +65 -0
- data/lib/dspy/observability.rb +7 -0
- data/lib/dspy/predict.rb +27 -37
- data/lib/dspy/re_act.rb +94 -35
- data/lib/dspy/signature.rb +12 -0
- data/lib/dspy/tools/base.rb +57 -85
- data/lib/dspy/tools/github_cli_toolset.rb +330 -0
- data/lib/dspy/tools/toolset.rb +33 -60
- data/lib/dspy/type_system/sorbet_json_schema.rb +263 -0
- data/lib/dspy/version.rb +1 -1
- data/lib/dspy.rb +1 -0
- metadata +5 -3
- data/lib/dspy/lm/cache_manager.rb +0 -151
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: d697eb8eb574ca5c23914c1911f1d7a03ad7411aa83b19bedf2231cacc544460
|
4
|
+
data.tar.gz: 3086cbaa86d01b0dd09512c9f5893f8a31b8d9988eed6782a967c24e1c12fb01
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: eae9e4cba177e6cea359f1ffd55ebaa4203cc5a6594b86ab5fc2b9b9e8c54cf24838a8d18102ab96fc9a8f4b85827c3cb02ef0b75c3d077c6c70301abb52f48d
|
7
|
+
data.tar.gz: ecc26be5f85df66e911a71d5a1fa878cc42c79c7d63e42b2d1440836859814837f4ba782db18584161598867a5aef5ebbfef056a2988b4208767b8e0c1999013
|
@@ -83,46 +83,38 @@ module DSPy
|
|
83
83
|
sig { returns(T.class_of(DSPy::Signature)) }
|
84
84
|
attr_reader :original_signature
|
85
85
|
|
86
|
-
# Override forward_untyped to add ChainOfThought-specific analysis
|
86
|
+
# Override forward_untyped to add ChainOfThought-specific analysis
|
87
|
+
# Let Module#forward handle the ChainOfThought span creation automatically
|
87
88
|
sig { override.params(input_values: T.untyped).returns(T.untyped) }
|
88
89
|
def forward_untyped(**input_values)
|
89
|
-
#
|
90
|
-
|
91
|
-
|
92
|
-
|
93
|
-
|
94
|
-
|
95
|
-
|
96
|
-
|
97
|
-
|
98
|
-
|
99
|
-
|
100
|
-
|
101
|
-
#
|
102
|
-
if
|
103
|
-
|
104
|
-
|
105
|
-
|
106
|
-
|
107
|
-
|
108
|
-
|
109
|
-
|
110
|
-
|
111
|
-
span.set_attribute('langfuse.observation.output', DSPy::Utils::Serialization.to_json(output_with_reasoning))
|
112
|
-
|
113
|
-
# Add reasoning metrics
|
114
|
-
if prediction_result.respond_to?(:reasoning) && prediction_result.reasoning
|
115
|
-
span.set_attribute('cot.reasoning_length', prediction_result.reasoning.length)
|
116
|
-
span.set_attribute('cot.has_reasoning', true)
|
117
|
-
span.set_attribute('cot.reasoning_steps', count_reasoning_steps(prediction_result.reasoning))
|
118
|
-
end
|
90
|
+
# Create a Predict instance and call its forward method (which will create Predict span via Module#forward)
|
91
|
+
# We can't call super.forward because that would go to Module#forward_untyped, not Module#forward
|
92
|
+
|
93
|
+
# Create a temporary Predict instance with our enhanced signature to get the prediction
|
94
|
+
predict_instance = DSPy::Predict.new(@signature_class)
|
95
|
+
predict_instance.config.lm = self.lm # Use the same LM configuration
|
96
|
+
|
97
|
+
# Call predict's forward method, which will create the Predict span
|
98
|
+
prediction_result = predict_instance.forward(**input_values)
|
99
|
+
|
100
|
+
# Add ChainOfThought-specific analysis and events
|
101
|
+
if DSPy::Observability.enabled? && prediction_result
|
102
|
+
# Add reasoning metrics via events
|
103
|
+
if prediction_result.respond_to?(:reasoning) && prediction_result.reasoning
|
104
|
+
DSPy.event('chain_of_thought.reasoning_metrics', {
|
105
|
+
'cot.reasoning_length' => prediction_result.reasoning.length,
|
106
|
+
'cot.has_reasoning' => true,
|
107
|
+
'cot.reasoning_steps' => count_reasoning_steps(prediction_result.reasoning),
|
108
|
+
'dspy.module_type' => 'chain_of_thought',
|
109
|
+
'dspy.signature' => @original_signature.name
|
110
|
+
})
|
119
111
|
end
|
120
|
-
|
121
|
-
# Analyze reasoning (emits events for backwards compatibility)
|
122
|
-
analyze_reasoning(prediction_result)
|
123
|
-
|
124
|
-
prediction_result
|
125
112
|
end
|
113
|
+
|
114
|
+
# Analyze reasoning (emits events for backwards compatibility)
|
115
|
+
analyze_reasoning(prediction_result)
|
116
|
+
|
117
|
+
prediction_result
|
126
118
|
end
|
127
119
|
|
128
120
|
private
|
@@ -204,7 +196,7 @@ module DSPy
|
|
204
196
|
# Emits reasoning analysis instrumentation event
|
205
197
|
sig { params(reasoning_content: String).void }
|
206
198
|
def emit_reasoning_analysis(reasoning_content)
|
207
|
-
DSPy.
|
199
|
+
DSPy.event('chain_of_thought.reasoning_complete', {
|
208
200
|
'dspy.signature' => @original_signature.name,
|
209
201
|
'cot.reasoning_steps' => count_reasoning_steps(reasoning_content),
|
210
202
|
'cot.reasoning_length' => reasoning_content.length,
|
data/lib/dspy/code_act.rb
CHANGED
@@ -355,7 +355,7 @@ module DSPy
|
|
355
355
|
|
356
356
|
sig { params(iteration: Integer, thought: String, ruby_code: String, execution_result: T.nilable(String), error_message: T.nilable(String)).void }
|
357
357
|
def emit_iteration_complete_event(iteration, thought, ruby_code, execution_result, error_message)
|
358
|
-
DSPy.
|
358
|
+
DSPy.event('codeact.iteration_complete', {
|
359
359
|
'codeact.iteration' => iteration,
|
360
360
|
'codeact.thought' => thought,
|
361
361
|
'codeact.ruby_code' => ruby_code,
|
@@ -368,7 +368,7 @@ module DSPy
|
|
368
368
|
sig { params(iterations_count: Integer, final_answer: T.nilable(String), history: T::Array[CodeActHistoryEntry]).void }
|
369
369
|
def handle_max_iterations_if_needed(iterations_count, final_answer, history)
|
370
370
|
if iterations_count >= @max_iterations && final_answer.nil?
|
371
|
-
DSPy.
|
371
|
+
DSPy.event('codeact.max_iterations', {
|
372
372
|
'codeact.iteration_count' => iterations_count,
|
373
373
|
'codeact.max_iterations' => @max_iterations,
|
374
374
|
'codeact.final_history_length' => history.length
|
data/lib/dspy/context.rb
CHANGED
@@ -6,31 +6,41 @@ module DSPy
|
|
6
6
|
class Context
|
7
7
|
class << self
|
8
8
|
def current
|
9
|
-
# Use
|
10
|
-
#
|
11
|
-
# Fiber storage ensures OpenTelemetry context propagation
|
9
|
+
# Use Thread storage as primary source to ensure thread isolation
|
10
|
+
# Fiber storage is used for OpenTelemetry context propagation within the same thread
|
12
11
|
|
13
12
|
# Create a unique key for this thread to ensure isolation
|
14
13
|
thread_key = :"dspy_context_#{Thread.current.object_id}"
|
15
14
|
|
16
|
-
#
|
15
|
+
# Always check thread-local storage first for proper isolation
|
17
16
|
if Thread.current[thread_key]
|
18
|
-
# Thread has context, ensure fiber
|
19
|
-
Fiber[:dspy_context]
|
20
|
-
|
21
|
-
|
22
|
-
context = {
|
23
|
-
trace_id: SecureRandom.uuid,
|
24
|
-
span_stack: []
|
25
|
-
}
|
26
|
-
# Set in both Thread and Fiber storage
|
27
|
-
Thread.current[thread_key] = context
|
28
|
-
Thread.current[:dspy_context] = context # Keep for backward compatibility
|
29
|
-
Fiber[:dspy_context] = context
|
17
|
+
# Thread has context, ensure fiber inherits it for OpenTelemetry propagation
|
18
|
+
Fiber[:dspy_context] = Thread.current[thread_key]
|
19
|
+
Thread.current[:dspy_context] = Thread.current[thread_key] # Keep for backward compatibility
|
20
|
+
return Thread.current[thread_key]
|
30
21
|
end
|
31
22
|
|
32
|
-
#
|
33
|
-
|
23
|
+
# Check if current fiber has context that was set by this same thread
|
24
|
+
# This handles cases where context was set via OpenTelemetry propagation within the thread
|
25
|
+
if Fiber[:dspy_context] && Thread.current[:dspy_context] == Fiber[:dspy_context]
|
26
|
+
# This fiber context was set by this thread, safe to use
|
27
|
+
Thread.current[thread_key] = Fiber[:dspy_context]
|
28
|
+
return Fiber[:dspy_context]
|
29
|
+
end
|
30
|
+
|
31
|
+
# No existing context or context belongs to different thread - create new one
|
32
|
+
context = {
|
33
|
+
trace_id: SecureRandom.uuid,
|
34
|
+
span_stack: [],
|
35
|
+
otel_span_stack: []
|
36
|
+
}
|
37
|
+
|
38
|
+
# Set in both Thread and Fiber storage
|
39
|
+
Thread.current[thread_key] = context
|
40
|
+
Thread.current[:dspy_context] = context # Keep for backward compatibility
|
41
|
+
Fiber[:dspy_context] = context
|
42
|
+
|
43
|
+
context
|
34
44
|
end
|
35
45
|
|
36
46
|
def with_span(operation:, **attributes)
|
@@ -67,23 +77,66 @@ module DSPy
|
|
67
77
|
# Record start time for explicit duration tracking
|
68
78
|
otel_start_time = Time.now
|
69
79
|
|
70
|
-
#
|
71
|
-
|
72
|
-
|
73
|
-
|
74
|
-
|
75
|
-
|
76
|
-
|
77
|
-
|
78
|
-
|
79
|
-
|
80
|
-
|
81
|
-
span
|
82
|
-
|
83
|
-
|
80
|
+
# Get parent OpenTelemetry span for proper context propagation
|
81
|
+
parent_otel_span = current[:otel_span_stack].last
|
82
|
+
|
83
|
+
# Create span with proper parent context
|
84
|
+
if parent_otel_span
|
85
|
+
# Use the parent span's context to ensure proper nesting
|
86
|
+
OpenTelemetry::Trace.with_span(parent_otel_span) do
|
87
|
+
DSPy::Observability.tracer.in_span(
|
88
|
+
operation,
|
89
|
+
attributes: span_attributes,
|
90
|
+
kind: :internal
|
91
|
+
) do |span|
|
92
|
+
# Add to our OpenTelemetry span stack
|
93
|
+
current[:otel_span_stack].push(span)
|
94
|
+
|
95
|
+
begin
|
96
|
+
result = yield(span)
|
97
|
+
|
98
|
+
# Add explicit timing information to help Langfuse
|
99
|
+
if span
|
100
|
+
duration_ms = ((Time.now - otel_start_time) * 1000).round(3)
|
101
|
+
span.set_attribute('duration.ms', duration_ms)
|
102
|
+
span.set_attribute('langfuse.observation.startTime', otel_start_time.iso8601(3))
|
103
|
+
span.set_attribute('langfuse.observation.endTime', Time.now.iso8601(3))
|
104
|
+
end
|
105
|
+
|
106
|
+
result
|
107
|
+
ensure
|
108
|
+
# Remove from our OpenTelemetry span stack
|
109
|
+
current[:otel_span_stack].pop
|
110
|
+
end
|
111
|
+
end
|
112
|
+
end
|
113
|
+
else
|
114
|
+
# Root span - no parent context needed
|
115
|
+
DSPy::Observability.tracer.in_span(
|
116
|
+
operation,
|
117
|
+
attributes: span_attributes,
|
118
|
+
kind: :internal
|
119
|
+
) do |span|
|
120
|
+
# Add to our OpenTelemetry span stack
|
121
|
+
current[:otel_span_stack].push(span)
|
122
|
+
|
123
|
+
begin
|
124
|
+
result = yield(span)
|
125
|
+
|
126
|
+
# Add explicit timing information to help Langfuse
|
127
|
+
if span
|
128
|
+
duration_ms = ((Time.now - otel_start_time) * 1000).round(3)
|
129
|
+
span.set_attribute('duration.ms', duration_ms)
|
130
|
+
span.set_attribute('langfuse.observation.startTime', otel_start_time.iso8601(3))
|
131
|
+
span.set_attribute('langfuse.observation.endTime', Time.now.iso8601(3))
|
132
|
+
end
|
133
|
+
|
134
|
+
result
|
135
|
+
ensure
|
136
|
+
# Remove from our OpenTelemetry span stack
|
137
|
+
current[:otel_span_stack].pop
|
138
|
+
end
|
84
139
|
end
|
85
|
-
|
86
|
-
result
|
87
140
|
end
|
88
141
|
else
|
89
142
|
yield(nil)
|
data/lib/dspy/errors.rb
CHANGED
@@ -1,7 +1,6 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
2
|
|
3
3
|
require "sorbet-runtime"
|
4
|
-
require_relative "../../cache_manager"
|
5
4
|
|
6
5
|
module DSPy
|
7
6
|
class LM
|
@@ -11,57 +10,43 @@ module DSPy
|
|
11
10
|
class SchemaConverter
|
12
11
|
extend T::Sig
|
13
12
|
|
14
|
-
# Models that support structured outputs
|
13
|
+
# Models that support structured outputs (JSON + Schema)
|
14
|
+
# Based on official Google documentation and gemini-ai gem table
|
15
15
|
STRUCTURED_OUTPUT_MODELS = T.let([
|
16
|
-
"gemini-1.5-pro",
|
17
|
-
"gemini-1.5-
|
18
|
-
"gemini-
|
16
|
+
"gemini-1.5-pro", # ✅ Full schema support (legacy)
|
17
|
+
"gemini-1.5-pro-preview-0514", # ✅ Full schema support (legacy)
|
18
|
+
"gemini-1.5-pro-preview-0409", # ✅ Full schema support (legacy)
|
19
|
+
"gemini-2.5-flash", # ✅ Full schema support (2025 current)
|
20
|
+
"gemini-2.5-flash-lite" # ✅ Full schema support (2025 current)
|
21
|
+
].freeze, T::Array[String])
|
22
|
+
|
23
|
+
# Models that support JSON mode but NOT schema
|
24
|
+
JSON_ONLY_MODELS = T.let([
|
25
|
+
"gemini-pro", # 🟡 JSON only, no schema
|
26
|
+
"gemini-1.5-flash", # 🟡 JSON only, no schema (legacy)
|
27
|
+
"gemini-1.5-flash-preview-0514", # 🟡 JSON only, no schema (legacy)
|
28
|
+
"gemini-1.0-pro-002", # 🟡 JSON only, no schema
|
29
|
+
"gemini-1.0-pro", # 🟡 JSON only, no schema
|
30
|
+
"gemini-2.0-flash-001", # 🟡 JSON only, no schema (2025)
|
31
|
+
"gemini-2.0-flash-lite-001" # 🟡 JSON only, no schema (2025)
|
19
32
|
].freeze, T::Array[String])
|
20
33
|
|
21
34
|
sig { params(signature_class: T.class_of(DSPy::Signature)).returns(T::Hash[Symbol, T.untyped]) }
|
22
35
|
def self.to_gemini_format(signature_class)
|
23
|
-
# Check cache first
|
24
|
-
cache_manager = DSPy::LM.cache_manager
|
25
|
-
cached_schema = cache_manager.get_schema(signature_class, "gemini", {})
|
26
|
-
|
27
|
-
if cached_schema
|
28
|
-
DSPy.logger.debug("Using cached schema for #{signature_class.name}")
|
29
|
-
return cached_schema
|
30
|
-
end
|
31
|
-
|
32
36
|
# Get the output JSON schema from the signature class
|
33
37
|
output_schema = signature_class.output_json_schema
|
34
38
|
|
35
39
|
# Convert to Gemini format (OpenAPI 3.0 Schema subset - not related to OpenAI)
|
36
|
-
|
37
|
-
|
38
|
-
# Cache the result
|
39
|
-
cache_manager.cache_schema(signature_class, "gemini", gemini_schema, {})
|
40
|
-
|
41
|
-
gemini_schema
|
40
|
+
convert_dspy_schema_to_gemini(output_schema)
|
42
41
|
end
|
43
42
|
|
44
43
|
sig { params(model: String).returns(T::Boolean) }
|
45
44
|
def self.supports_structured_outputs?(model)
|
46
|
-
# Check cache first
|
47
|
-
cache_manager = DSPy::LM.cache_manager
|
48
|
-
cached_result = cache_manager.get_capability(model, "structured_outputs")
|
49
|
-
|
50
|
-
if !cached_result.nil?
|
51
|
-
DSPy.logger.debug("Using cached capability check for #{model}")
|
52
|
-
return cached_result
|
53
|
-
end
|
54
|
-
|
55
45
|
# Extract base model name without provider prefix
|
56
46
|
base_model = model.sub(/^gemini\//, "")
|
57
47
|
|
58
48
|
# Check if it's a supported model or a newer version
|
59
|
-
|
60
|
-
|
61
|
-
# Cache the result
|
62
|
-
cache_manager.cache_capability(model, "structured_outputs", result)
|
63
|
-
|
64
|
-
result
|
49
|
+
STRUCTURED_OUTPUT_MODELS.any? { |supported| base_model.start_with?(supported) }
|
65
50
|
end
|
66
51
|
|
67
52
|
sig { params(schema: T::Hash[Symbol, T.untyped]).returns(T::Array[String]) }
|
@@ -81,6 +66,8 @@ module DSPy
|
|
81
66
|
|
82
67
|
sig { params(dspy_schema: T::Hash[Symbol, T.untyped]).returns(T::Hash[Symbol, T.untyped]) }
|
83
68
|
def self.convert_dspy_schema_to_gemini(dspy_schema)
|
69
|
+
# For Gemini's responseJsonSchema, we need pure JSON Schema format
|
70
|
+
# Remove OpenAPI-specific fields like "$schema"
|
84
71
|
result = {
|
85
72
|
type: "object",
|
86
73
|
properties: {},
|
@@ -101,6 +88,21 @@ module DSPy
|
|
101
88
|
|
102
89
|
sig { params(property_schema: T::Hash[Symbol, T.untyped]).returns(T::Hash[Symbol, T.untyped]) }
|
103
90
|
def self.convert_property_to_gemini(property_schema)
|
91
|
+
# Handle oneOf/anyOf schemas (union types) - Gemini supports these in responseJsonSchema
|
92
|
+
if property_schema[:oneOf]
|
93
|
+
return {
|
94
|
+
oneOf: property_schema[:oneOf].map { |schema| convert_property_to_gemini(schema) },
|
95
|
+
description: property_schema[:description]
|
96
|
+
}.compact
|
97
|
+
end
|
98
|
+
|
99
|
+
if property_schema[:anyOf]
|
100
|
+
return {
|
101
|
+
anyOf: property_schema[:anyOf].map { |schema| convert_property_to_gemini(schema) },
|
102
|
+
description: property_schema[:description]
|
103
|
+
}.compact
|
104
|
+
end
|
105
|
+
|
104
106
|
case property_schema[:type]
|
105
107
|
when "string"
|
106
108
|
result = { type: "string" }
|
@@ -13,14 +13,24 @@ module DSPy
|
|
13
13
|
|
14
14
|
@structured_outputs_enabled = structured_outputs
|
15
15
|
|
16
|
+
# Disable streaming for VCR tests since SSE responses don't record properly
|
17
|
+
@use_streaming = true
|
18
|
+
begin
|
19
|
+
@use_streaming = false if defined?(VCR) && VCR.current_cassette
|
20
|
+
rescue
|
21
|
+
# If VCR is not available or any error occurs, use streaming
|
22
|
+
@use_streaming = true
|
23
|
+
end
|
24
|
+
|
16
25
|
@client = Gemini.new(
|
17
26
|
credentials: {
|
18
27
|
service: 'generative-language-api',
|
19
|
-
api_key: api_key
|
28
|
+
api_key: api_key,
|
29
|
+
version: 'v1beta' # Use beta API version for structured outputs support
|
20
30
|
},
|
21
31
|
options: {
|
22
32
|
model: model,
|
23
|
-
server_sent_events:
|
33
|
+
server_sent_events: @use_streaming
|
24
34
|
}
|
25
35
|
)
|
26
36
|
end
|
@@ -43,33 +53,47 @@ module DSPy
|
|
43
53
|
}.merge(extra_params)
|
44
54
|
|
45
55
|
begin
|
46
|
-
# Always use streaming
|
47
56
|
content = ""
|
48
57
|
final_response_data = nil
|
49
58
|
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
|
59
|
+
# Check if we're using streaming or not
|
60
|
+
if @use_streaming
|
61
|
+
# Streaming mode
|
62
|
+
@client.stream_generate_content(request_params) do |chunk|
|
63
|
+
# Handle case where chunk might be a string (from SSE VCR)
|
64
|
+
if chunk.is_a?(String)
|
65
|
+
begin
|
66
|
+
chunk = JSON.parse(chunk)
|
67
|
+
rescue JSON::ParserError => e
|
68
|
+
raise AdapterError, "Failed to parse Gemini streaming response: #{e.message}"
|
69
|
+
end
|
57
70
|
end
|
58
|
-
end
|
59
|
-
|
60
|
-
# Extract content from chunks
|
61
|
-
if chunk.dig('candidates', 0, 'content', 'parts')
|
62
|
-
chunk_text = extract_text_from_parts(chunk.dig('candidates', 0, 'content', 'parts'))
|
63
|
-
content += chunk_text
|
64
71
|
|
65
|
-
#
|
66
|
-
|
72
|
+
# Extract content from chunks
|
73
|
+
if chunk.dig('candidates', 0, 'content', 'parts')
|
74
|
+
chunk_text = extract_text_from_parts(chunk.dig('candidates', 0, 'content', 'parts'))
|
75
|
+
content += chunk_text
|
76
|
+
|
77
|
+
# Call block only if provided (for real streaming)
|
78
|
+
block.call(chunk) if block_given?
|
79
|
+
end
|
80
|
+
|
81
|
+
# Store final response data (usage, metadata) from last chunk
|
82
|
+
if chunk['usageMetadata'] || chunk.dig('candidates', 0, 'finishReason')
|
83
|
+
final_response_data = chunk
|
84
|
+
end
|
67
85
|
end
|
86
|
+
else
|
87
|
+
# Non-streaming mode (for VCR tests)
|
88
|
+
response = @client.generate_content(request_params)
|
68
89
|
|
69
|
-
#
|
70
|
-
if
|
71
|
-
|
90
|
+
# Extract content from single response
|
91
|
+
if response.dig('candidates', 0, 'content', 'parts')
|
92
|
+
content = extract_text_from_parts(response.dig('candidates', 0, 'content', 'parts'))
|
72
93
|
end
|
94
|
+
|
95
|
+
# Use response as final data
|
96
|
+
final_response_data = response
|
73
97
|
end
|
74
98
|
|
75
99
|
# Extract usage information from final chunk
|
@@ -1,7 +1,6 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
2
|
|
3
3
|
require "sorbet-runtime"
|
4
|
-
require_relative "../../cache_manager"
|
5
4
|
|
6
5
|
module DSPy
|
7
6
|
class LM
|
@@ -22,22 +21,12 @@ module DSPy
|
|
22
21
|
|
23
22
|
sig { params(signature_class: T.class_of(DSPy::Signature), name: T.nilable(String), strict: T::Boolean).returns(T::Hash[Symbol, T.untyped]) }
|
24
23
|
def self.to_openai_format(signature_class, name: nil, strict: true)
|
25
|
-
# Build cache params from the method parameters
|
26
|
-
cache_params = { strict: strict }
|
27
|
-
cache_params[:name] = name if name
|
28
|
-
|
29
|
-
# Check cache first
|
30
|
-
cache_manager = DSPy::LM.cache_manager
|
31
|
-
cached_schema = cache_manager.get_schema(signature_class, "openai", cache_params)
|
32
|
-
|
33
|
-
if cached_schema
|
34
|
-
DSPy.logger.debug("Using cached schema for #{signature_class.name}")
|
35
|
-
return cached_schema
|
36
|
-
end
|
37
|
-
|
38
24
|
# Get the output JSON schema from the signature class
|
39
25
|
output_schema = signature_class.output_json_schema
|
40
26
|
|
27
|
+
# Convert oneOf to anyOf where safe, or raise error for unsupported cases
|
28
|
+
output_schema = convert_oneof_to_anyof_if_safe(output_schema)
|
29
|
+
|
41
30
|
# Build the complete schema with OpenAI-specific modifications
|
42
31
|
dspy_schema = {
|
43
32
|
"$schema": "http://json-schema.org/draft-06/schema#",
|
@@ -59,7 +48,7 @@ module DSPy
|
|
59
48
|
end
|
60
49
|
|
61
50
|
# Wrap in OpenAI's required format
|
62
|
-
|
51
|
+
{
|
63
52
|
type: "json_schema",
|
64
53
|
json_schema: {
|
65
54
|
name: schema_name,
|
@@ -67,34 +56,75 @@ module DSPy
|
|
67
56
|
schema: openai_schema
|
68
57
|
}
|
69
58
|
}
|
59
|
+
end
|
60
|
+
|
61
|
+
# Convert oneOf to anyOf if safe (discriminated unions), otherwise raise error
|
62
|
+
sig { params(schema: T.untyped).returns(T.untyped) }
|
63
|
+
def self.convert_oneof_to_anyof_if_safe(schema)
|
64
|
+
return schema unless schema.is_a?(Hash)
|
65
|
+
|
66
|
+
result = schema.dup
|
67
|
+
|
68
|
+
# Check if this schema has oneOf that we can safely convert
|
69
|
+
if result[:oneOf]
|
70
|
+
if all_have_discriminators?(result[:oneOf])
|
71
|
+
# Safe to convert - discriminators ensure mutual exclusivity
|
72
|
+
result[:anyOf] = result.delete(:oneOf).map { |s| convert_oneof_to_anyof_if_safe(s) }
|
73
|
+
else
|
74
|
+
# Unsafe conversion - raise error
|
75
|
+
raise DSPy::UnsupportedSchemaError.new(
|
76
|
+
"OpenAI structured outputs do not support oneOf schemas without discriminator fields. " \
|
77
|
+
"The schema contains union types that cannot be safely converted to anyOf. " \
|
78
|
+
"Please use enhanced_prompting strategy instead or add discriminator fields to union types."
|
79
|
+
)
|
80
|
+
end
|
81
|
+
end
|
82
|
+
|
83
|
+
# Recursively process nested schemas
|
84
|
+
if result[:properties].is_a?(Hash)
|
85
|
+
result[:properties] = result[:properties].transform_values { |v| convert_oneof_to_anyof_if_safe(v) }
|
86
|
+
end
|
70
87
|
|
71
|
-
|
72
|
-
|
88
|
+
if result[:items].is_a?(Hash)
|
89
|
+
result[:items] = convert_oneof_to_anyof_if_safe(result[:items])
|
90
|
+
end
|
91
|
+
|
92
|
+
# Process arrays of schema items
|
93
|
+
if result[:items].is_a?(Array)
|
94
|
+
result[:items] = result[:items].map { |item|
|
95
|
+
item.is_a?(Hash) ? convert_oneof_to_anyof_if_safe(item) : item
|
96
|
+
}
|
97
|
+
end
|
98
|
+
|
99
|
+
# Process anyOf arrays (in case there are nested oneOf within anyOf)
|
100
|
+
if result[:anyOf].is_a?(Array)
|
101
|
+
result[:anyOf] = result[:anyOf].map { |item|
|
102
|
+
item.is_a?(Hash) ? convert_oneof_to_anyof_if_safe(item) : item
|
103
|
+
}
|
104
|
+
end
|
73
105
|
|
74
106
|
result
|
75
107
|
end
|
108
|
+
|
109
|
+
# Check if all schemas in a oneOf array have discriminator fields (const properties)
|
110
|
+
sig { params(schemas: T::Array[T.untyped]).returns(T::Boolean) }
|
111
|
+
def self.all_have_discriminators?(schemas)
|
112
|
+
schemas.all? do |schema|
|
113
|
+
next false unless schema.is_a?(Hash)
|
114
|
+
next false unless schema[:properties].is_a?(Hash)
|
115
|
+
|
116
|
+
# Check if any property has a const value (our discriminator pattern)
|
117
|
+
schema[:properties].any? { |_, prop| prop.is_a?(Hash) && prop[:const] }
|
118
|
+
end
|
119
|
+
end
|
76
120
|
|
77
121
|
sig { params(model: String).returns(T::Boolean) }
|
78
122
|
def self.supports_structured_outputs?(model)
|
79
|
-
# Check cache first
|
80
|
-
cache_manager = DSPy::LM.cache_manager
|
81
|
-
cached_result = cache_manager.get_capability(model, "structured_outputs")
|
82
|
-
|
83
|
-
if !cached_result.nil?
|
84
|
-
DSPy.logger.debug("Using cached capability check for #{model}")
|
85
|
-
return cached_result
|
86
|
-
end
|
87
|
-
|
88
123
|
# Extract base model name without provider prefix
|
89
124
|
base_model = model.sub(/^openai\//, "")
|
90
125
|
|
91
126
|
# Check if it's a supported model or a newer version
|
92
|
-
|
93
|
-
|
94
|
-
# Cache the result
|
95
|
-
cache_manager.cache_capability(model, "structured_outputs", result)
|
96
|
-
|
97
|
-
result
|
127
|
+
STRUCTURED_OUTPUT_MODELS.any? { |supported| base_model.start_with?(supported) }
|
98
128
|
end
|
99
129
|
|
100
130
|
sig { params(schema: T::Hash[Symbol, T.untyped]).returns(T::Array[String]) }
|
@@ -226,8 +256,8 @@ module DSPy
|
|
226
256
|
end
|
227
257
|
end
|
228
258
|
|
229
|
-
# Process
|
230
|
-
[:
|
259
|
+
# Process anyOf/allOf (oneOf should be converted to anyOf by this point)
|
260
|
+
[:anyOf, :allOf].each do |key|
|
231
261
|
if result[key].is_a?(Array)
|
232
262
|
result[key] = result[key].map do |sub_schema|
|
233
263
|
sub_schema.is_a?(Hash) ? add_additional_properties_recursively(sub_schema) : sub_schema
|
@@ -272,8 +302,8 @@ module DSPy
|
|
272
302
|
max_depth = [max_depth, items_depth].max
|
273
303
|
end
|
274
304
|
|
275
|
-
# Check
|
276
|
-
[:
|
305
|
+
# Check anyOf/allOf (oneOf should be converted to anyOf by this point)
|
306
|
+
[:anyOf, :allOf].each do |key|
|
277
307
|
if schema[key].is_a?(Array)
|
278
308
|
schema[key].each do |sub_schema|
|
279
309
|
if sub_schema.is_a?(Hash)
|
@@ -291,8 +321,8 @@ module DSPy
|
|
291
321
|
def self.contains_pattern_properties?(schema)
|
292
322
|
return true if schema[:patternProperties]
|
293
323
|
|
294
|
-
# Recursively check nested schemas
|
295
|
-
[:properties, :items, :
|
324
|
+
# Recursively check nested schemas (oneOf should be converted to anyOf by this point)
|
325
|
+
[:properties, :items, :anyOf, :allOf].each do |key|
|
296
326
|
value = schema[key]
|
297
327
|
case value
|
298
328
|
when Hash
|
@@ -309,8 +339,8 @@ module DSPy
|
|
309
339
|
def self.contains_conditional_schemas?(schema)
|
310
340
|
return true if schema[:if] || schema[:then] || schema[:else]
|
311
341
|
|
312
|
-
# Recursively check nested schemas
|
313
|
-
[:properties, :items, :
|
342
|
+
# Recursively check nested schemas (oneOf should be converted to anyOf by this point)
|
343
|
+
[:properties, :items, :anyOf, :allOf].each do |key|
|
314
344
|
value = schema[key]
|
315
345
|
case value
|
316
346
|
when Hash
|