dspy 0.34.3 → 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +9 -16
- data/lib/dspy/context.rb +53 -20
- data/lib/dspy/document.rb +153 -0
- data/lib/dspy/lm/adapter.rb +23 -0
- data/lib/dspy/lm/errors.rb +7 -2
- data/lib/dspy/lm/json_strategy.rb +87 -124
- data/lib/dspy/lm/message.rb +5 -1
- data/lib/dspy/lm/message_builder.rb +15 -1
- data/lib/dspy/lm/response.rb +1 -1
- data/lib/dspy/lm/usage.rb +31 -6
- data/lib/dspy/lm.rb +81 -8
- data/lib/dspy/mixins/type_coercion.rb +76 -14
- data/lib/dspy/module.rb +133 -6
- data/lib/dspy/predict.rb +1 -1
- data/lib/dspy/prediction.rb +10 -1
- data/lib/dspy/prompt.rb +2 -46
- data/lib/dspy/re_act.rb +159 -34
- data/lib/dspy/ruby_llm/lm/adapters/ruby_llm_adapter.rb +13 -3
- data/lib/dspy/ruby_llm/version.rb +1 -1
- data/lib/dspy/ruby_llm.rb +0 -3
- data/lib/dspy/signature.rb +4 -5
- data/lib/dspy/structured_outputs_prompt.rb +1 -1
- data/lib/dspy/support/openai_sdk_warning.rb +32 -0
- data/lib/dspy/utils/serialization.rb +2 -6
- data/lib/dspy/version.rb +1 -1
- data/lib/dspy.rb +52 -17
- metadata +9 -2
- data/lib/dspy/ruby_llm/guardrails.rb +0 -24
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: 85ae2299dbddc20bfd710c68e9e8dfbeeb201b742e3fd18b768669ddc4443261
|
|
4
|
+
data.tar.gz: ba0ee2d5f637f499448e456fb58f0513aa58b5b258e754bf1b32839ea8c49b63
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: 310de5ce11b23d29bd168069eae4f5ce3ac154ba91974c96b9d70e5d02a0dcb45122dc4c83f80097a409ff448a0a30d45ebec3ff0883d80b2cdd8f1215d2dfff
|
|
7
|
+
data.tar.gz: 1ce5ff1bfe900bcedabab28efba1e8352fed81c6cbf458d5b7c54e8f7eea29fa6b7ee33a62dcc73b456ede6181a85bbf055541e4fb70037ac4de05e3be2b8ce9
|
data/README.md
CHANGED
|
@@ -9,6 +9,7 @@
|
|
|
9
9
|
**Build reliable LLM applications in idiomatic Ruby using composable, type-safe modules.**
|
|
10
10
|
|
|
11
11
|
DSPy.rb is the Ruby port of Stanford's [DSPy](https://dspy.ai). Instead of wrestling with brittle prompt strings, you define typed signatures and let the framework handle the rest. Prompts become functions. LLM calls become predictable.
|
|
12
|
+
The `1.x` line is the stable release track for production Ruby LLM applications.
|
|
12
13
|
|
|
13
14
|
```ruby
|
|
14
15
|
require 'dspy'
|
|
@@ -137,26 +138,18 @@ result.answer # => "60 km/h"
|
|
|
137
138
|
Build agents that use tools to accomplish tasks:
|
|
138
139
|
|
|
139
140
|
```ruby
|
|
140
|
-
class SearchTool < DSPy::Tools::
|
|
141
|
+
class SearchTool < DSPy::Tools::Base
|
|
141
142
|
tool_name "search"
|
|
142
|
-
|
|
143
|
-
|
|
144
|
-
input do
|
|
145
|
-
const :query, String
|
|
146
|
-
end
|
|
147
|
-
|
|
148
|
-
output do
|
|
149
|
-
const :results, T::Array[String]
|
|
150
|
-
end
|
|
143
|
+
tool_description "Search for information"
|
|
151
144
|
|
|
145
|
+
sig { params(query: String).returns(String) }
|
|
152
146
|
def call(query:)
|
|
153
147
|
# Your search implementation
|
|
154
|
-
|
|
148
|
+
"Result 1, Result 2"
|
|
155
149
|
end
|
|
156
150
|
end
|
|
157
151
|
|
|
158
|
-
|
|
159
|
-
agent = DSPy::ReAct.new(signature: ResearchTask, tools: toolset, max_iterations: 5)
|
|
152
|
+
agent = DSPy::ReAct.new(ResearchTask, tools: [SearchTool.new], max_iterations: 5)
|
|
160
153
|
result = agent.call(question: "What's the latest on Ruby 3.4?")
|
|
161
154
|
```
|
|
162
155
|
|
|
@@ -185,8 +178,8 @@ result = agent.call(question: "What's the latest on Ruby 3.4?")
|
|
|
185
178
|
A [Claude Skill](https://github.com/vicentereig/dspy-rb-skill) is available to help you build DSPy.rb applications:
|
|
186
179
|
|
|
187
180
|
```bash
|
|
188
|
-
# Claude Code
|
|
189
|
-
|
|
181
|
+
# Claude Code — install from the vicentereig/engineering marketplace
|
|
182
|
+
claude install-skill vicentereig/engineering --skill dspy-rb
|
|
190
183
|
```
|
|
191
184
|
|
|
192
185
|
For Claude.ai Pro/Max, download the [skill ZIP](https://github.com/vicentereig/dspy-rb-skill/archive/refs/heads/main.zip) and upload via Settings > Skills.
|
|
@@ -201,7 +194,7 @@ The [examples/](examples/) directory has runnable code for common patterns:
|
|
|
201
194
|
- Prompt optimization
|
|
202
195
|
|
|
203
196
|
```bash
|
|
204
|
-
bundle exec ruby examples/
|
|
197
|
+
bundle exec ruby examples/basic_search_agent.rb
|
|
205
198
|
```
|
|
206
199
|
|
|
207
200
|
## Optional Gems
|
data/lib/dspy/context.rb
CHANGED
|
@@ -74,8 +74,9 @@ module DSPy
|
|
|
74
74
|
# Prepare attributes and add trace name for root spans
|
|
75
75
|
span_attributes = sanitized_attributes.transform_keys(&:to_s).reject { |k, v| v.nil? }
|
|
76
76
|
|
|
77
|
-
# Set trace name if this is likely a root span (no parent in our stack)
|
|
78
|
-
|
|
77
|
+
# Set trace name if this is likely a root span (no parent in our stack),
|
|
78
|
+
# unless callers already specified one explicitly.
|
|
79
|
+
if current[:span_stack].length == 1 && !span_attributes.key?('langfuse.trace.name')
|
|
79
80
|
span_attributes['langfuse.trace.name'] = operation
|
|
80
81
|
end
|
|
81
82
|
|
|
@@ -84,6 +85,12 @@ module DSPy
|
|
|
84
85
|
|
|
85
86
|
# Get parent OpenTelemetry span for proper context propagation
|
|
86
87
|
parent_otel_span = current[:otel_span_stack].last
|
|
88
|
+
if !parent_otel_span && defined?(OpenTelemetry::Trace)
|
|
89
|
+
current_span = OpenTelemetry::Trace.current_span
|
|
90
|
+
if current_span && current_span != OpenTelemetry::Trace::Span::INVALID
|
|
91
|
+
parent_otel_span = current_span
|
|
92
|
+
end
|
|
93
|
+
end
|
|
87
94
|
|
|
88
95
|
# Create span with proper parent context
|
|
89
96
|
if parent_otel_span
|
|
@@ -96,20 +103,18 @@ module DSPy
|
|
|
96
103
|
) do |span|
|
|
97
104
|
# Add to our OpenTelemetry span stack
|
|
98
105
|
current[:otel_span_stack].push(span)
|
|
106
|
+
succeeded = false
|
|
99
107
|
|
|
100
108
|
begin
|
|
101
109
|
result = yield(span)
|
|
102
|
-
|
|
103
|
-
# Add explicit timing information to help Langfuse
|
|
104
|
-
if span
|
|
105
|
-
duration_ms = ((Time.now - otel_start_time) * 1000).round(3)
|
|
106
|
-
span.set_attribute('duration.ms', duration_ms)
|
|
107
|
-
span.set_attribute('langfuse.observation.startTime', otel_start_time.iso8601(3))
|
|
108
|
-
span.set_attribute('langfuse.observation.endTime', Time.now.iso8601(3))
|
|
109
|
-
end
|
|
110
|
-
|
|
110
|
+
succeeded = true
|
|
111
111
|
result
|
|
112
|
+
rescue StandardError => e
|
|
113
|
+
set_span_error_attributes(span, e)
|
|
114
|
+
raise
|
|
112
115
|
ensure
|
|
116
|
+
set_span_status_attribute(span, succeeded)
|
|
117
|
+
set_span_timing_attributes(span, otel_start_time)
|
|
113
118
|
# Remove from our OpenTelemetry span stack
|
|
114
119
|
current[:otel_span_stack].pop
|
|
115
120
|
end
|
|
@@ -124,20 +129,18 @@ module DSPy
|
|
|
124
129
|
) do |span|
|
|
125
130
|
# Add to our OpenTelemetry span stack
|
|
126
131
|
current[:otel_span_stack].push(span)
|
|
132
|
+
succeeded = false
|
|
127
133
|
|
|
128
134
|
begin
|
|
129
135
|
result = yield(span)
|
|
130
|
-
|
|
131
|
-
# Add explicit timing information to help Langfuse
|
|
132
|
-
if span
|
|
133
|
-
duration_ms = ((Time.now - otel_start_time) * 1000).round(3)
|
|
134
|
-
span.set_attribute('duration.ms', duration_ms)
|
|
135
|
-
span.set_attribute('langfuse.observation.startTime', otel_start_time.iso8601(3))
|
|
136
|
-
span.set_attribute('langfuse.observation.endTime', Time.now.iso8601(3))
|
|
137
|
-
end
|
|
138
|
-
|
|
136
|
+
succeeded = true
|
|
139
137
|
result
|
|
138
|
+
rescue StandardError => e
|
|
139
|
+
set_span_error_attributes(span, e)
|
|
140
|
+
raise
|
|
140
141
|
ensure
|
|
142
|
+
set_span_status_attribute(span, succeeded)
|
|
143
|
+
set_span_timing_attributes(span, otel_start_time)
|
|
141
144
|
# Remove from our OpenTelemetry span stack
|
|
142
145
|
current[:otel_span_stack].pop
|
|
143
146
|
end
|
|
@@ -296,6 +299,36 @@ module DSPy
|
|
|
296
299
|
label: explicit_label || (module_instance.respond_to?(:module_scope_label) ? module_instance.module_scope_label : nil)
|
|
297
300
|
}
|
|
298
301
|
end
|
|
302
|
+
|
|
303
|
+
def set_span_timing_attributes(span, otel_start_time)
|
|
304
|
+
return unless span
|
|
305
|
+
|
|
306
|
+
now = Time.now
|
|
307
|
+
duration_ms = ((now - otel_start_time) * 1000).round(3)
|
|
308
|
+
span.set_attribute('duration.ms', duration_ms)
|
|
309
|
+
span.set_attribute('langfuse.observation.startTime', otel_start_time.iso8601(3))
|
|
310
|
+
span.set_attribute('langfuse.observation.endTime', now.iso8601(3))
|
|
311
|
+
rescue StandardError
|
|
312
|
+
nil
|
|
313
|
+
end
|
|
314
|
+
|
|
315
|
+
def set_span_error_attributes(span, error)
|
|
316
|
+
return unless span
|
|
317
|
+
|
|
318
|
+
span.set_attribute('error', true)
|
|
319
|
+
span.set_attribute('error.type', error.class.name)
|
|
320
|
+
span.set_attribute('error.message', error.message.to_s[0, 2000]) if error.message
|
|
321
|
+
rescue StandardError
|
|
322
|
+
nil
|
|
323
|
+
end
|
|
324
|
+
|
|
325
|
+
def set_span_status_attribute(span, succeeded)
|
|
326
|
+
return unless span
|
|
327
|
+
|
|
328
|
+
span.set_attribute('dspy.status', succeeded ? 'completed' : 'error')
|
|
329
|
+
rescue StandardError
|
|
330
|
+
nil
|
|
331
|
+
end
|
|
299
332
|
end
|
|
300
333
|
end
|
|
301
334
|
end
|
|
@@ -0,0 +1,153 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require 'base64'
|
|
4
|
+
require 'stringio'
|
|
5
|
+
require 'uri'
|
|
6
|
+
|
|
7
|
+
module DSPy
|
|
8
|
+
class Document
|
|
9
|
+
class RubyLLMInlineAttachment < StringIO
|
|
10
|
+
attr_reader :path
|
|
11
|
+
|
|
12
|
+
def initialize(content, path:)
|
|
13
|
+
super(content)
|
|
14
|
+
@path = path
|
|
15
|
+
binmode
|
|
16
|
+
end
|
|
17
|
+
end
|
|
18
|
+
|
|
19
|
+
private_constant :RubyLLMInlineAttachment
|
|
20
|
+
|
|
21
|
+
attr_reader :url, :base64, :data, :content_type
|
|
22
|
+
|
|
23
|
+
SUPPORTED_FORMATS = %w[application/pdf].freeze
|
|
24
|
+
MAX_SIZE_BYTES = 32 * 1024 * 1024 # 32MB limit
|
|
25
|
+
|
|
26
|
+
def initialize(url: nil, base64: nil, data: nil, content_type: nil)
|
|
27
|
+
validate_input!(url, base64, data)
|
|
28
|
+
|
|
29
|
+
if url
|
|
30
|
+
@url = url
|
|
31
|
+
@content_type = content_type || infer_content_type_from_url(url)
|
|
32
|
+
elsif base64
|
|
33
|
+
raise ArgumentError, "content_type is required when using base64" unless content_type
|
|
34
|
+
|
|
35
|
+
@base64 = base64
|
|
36
|
+
@content_type = content_type
|
|
37
|
+
validate_size!(Base64.decode64(base64).bytesize)
|
|
38
|
+
elsif data
|
|
39
|
+
raise ArgumentError, "content_type is required when using data" unless content_type
|
|
40
|
+
|
|
41
|
+
@data = data
|
|
42
|
+
@content_type = content_type
|
|
43
|
+
validate_size!(data.size)
|
|
44
|
+
end
|
|
45
|
+
|
|
46
|
+
validate_content_type!
|
|
47
|
+
end
|
|
48
|
+
|
|
49
|
+
def to_openai_format
|
|
50
|
+
raise DSPy::LM::IncompatibleDocumentFeatureError,
|
|
51
|
+
"OpenAI document inputs are not supported in this release. Use Anthropic directly or Anthropic via RubyLLM."
|
|
52
|
+
end
|
|
53
|
+
|
|
54
|
+
def to_anthropic_format
|
|
55
|
+
if url
|
|
56
|
+
{
|
|
57
|
+
type: 'document',
|
|
58
|
+
source: {
|
|
59
|
+
type: 'url',
|
|
60
|
+
url: url
|
|
61
|
+
}
|
|
62
|
+
}
|
|
63
|
+
else
|
|
64
|
+
{
|
|
65
|
+
type: 'document',
|
|
66
|
+
source: {
|
|
67
|
+
type: 'base64',
|
|
68
|
+
media_type: content_type,
|
|
69
|
+
data: to_base64
|
|
70
|
+
}
|
|
71
|
+
}
|
|
72
|
+
end
|
|
73
|
+
end
|
|
74
|
+
|
|
75
|
+
def to_gemini_format
|
|
76
|
+
raise DSPy::LM::IncompatibleDocumentFeatureError,
|
|
77
|
+
"Gemini document inputs are not supported in this release. Use Anthropic directly or Anthropic via RubyLLM."
|
|
78
|
+
end
|
|
79
|
+
|
|
80
|
+
def to_ruby_llm_attachment
|
|
81
|
+
if url
|
|
82
|
+
url
|
|
83
|
+
else
|
|
84
|
+
RubyLLMInlineAttachment.new(to_binary, path: 'document.pdf')
|
|
85
|
+
end
|
|
86
|
+
end
|
|
87
|
+
|
|
88
|
+
def to_base64
|
|
89
|
+
return base64 if base64
|
|
90
|
+
return Base64.strict_encode64(data.pack('C*')) if data
|
|
91
|
+
|
|
92
|
+
nil
|
|
93
|
+
end
|
|
94
|
+
|
|
95
|
+
def validate_for_provider!(provider)
|
|
96
|
+
case provider
|
|
97
|
+
when 'anthropic'
|
|
98
|
+
true
|
|
99
|
+
when 'openai'
|
|
100
|
+
raise DSPy::LM::IncompatibleDocumentFeatureError,
|
|
101
|
+
"OpenAI document inputs are not supported in this release. Use Anthropic directly or Anthropic via RubyLLM."
|
|
102
|
+
when 'gemini'
|
|
103
|
+
raise DSPy::LM::IncompatibleDocumentFeatureError,
|
|
104
|
+
"Gemini document inputs are not supported in this release. Use Anthropic directly or Anthropic via RubyLLM."
|
|
105
|
+
else
|
|
106
|
+
raise DSPy::LM::IncompatibleDocumentFeatureError,
|
|
107
|
+
"Unknown provider '#{provider}'. Document inputs are currently supported only for Anthropic."
|
|
108
|
+
end
|
|
109
|
+
end
|
|
110
|
+
|
|
111
|
+
private
|
|
112
|
+
|
|
113
|
+
def validate_input!(url, base64, data)
|
|
114
|
+
inputs = [url, base64, data].compact
|
|
115
|
+
|
|
116
|
+
if inputs.empty?
|
|
117
|
+
raise ArgumentError, "Must provide either url, base64, or data"
|
|
118
|
+
elsif inputs.size > 1
|
|
119
|
+
raise ArgumentError, "Only one of url, base64, or data can be provided"
|
|
120
|
+
end
|
|
121
|
+
end
|
|
122
|
+
|
|
123
|
+
def validate_content_type!
|
|
124
|
+
unless SUPPORTED_FORMATS.include?(content_type)
|
|
125
|
+
raise ArgumentError, "Unsupported document format: #{content_type}. Supported formats: #{SUPPORTED_FORMATS.join(', ')}"
|
|
126
|
+
end
|
|
127
|
+
end
|
|
128
|
+
|
|
129
|
+
def validate_size!(size_bytes)
|
|
130
|
+
if size_bytes > MAX_SIZE_BYTES
|
|
131
|
+
raise ArgumentError, "Document size exceeds 32MB limit (got #{size_bytes} bytes)"
|
|
132
|
+
end
|
|
133
|
+
end
|
|
134
|
+
|
|
135
|
+
def infer_content_type_from_url(url)
|
|
136
|
+
extension = File.extname(URI.parse(url).path).downcase
|
|
137
|
+
|
|
138
|
+
case extension
|
|
139
|
+
when '.pdf'
|
|
140
|
+
'application/pdf'
|
|
141
|
+
else
|
|
142
|
+
raise ArgumentError, "Document URL must point to a PDF (.pdf): #{url}"
|
|
143
|
+
end
|
|
144
|
+
end
|
|
145
|
+
|
|
146
|
+
def to_binary
|
|
147
|
+
return Base64.decode64(base64) if base64
|
|
148
|
+
return data.pack('C*') if data
|
|
149
|
+
|
|
150
|
+
raise ArgumentError, "Document has no binary content"
|
|
151
|
+
end
|
|
152
|
+
end
|
|
153
|
+
end
|
data/lib/dspy/lm/adapter.rb
CHANGED
|
@@ -58,6 +58,17 @@ module DSPy
|
|
|
58
58
|
end
|
|
59
59
|
end
|
|
60
60
|
|
|
61
|
+
def contains_documents?(messages)
|
|
62
|
+
messages.any? do |msg|
|
|
63
|
+
content = msg[:content] || msg.content
|
|
64
|
+
content.is_a?(Array) && content.any? { |item| item[:type] == 'document' }
|
|
65
|
+
end
|
|
66
|
+
end
|
|
67
|
+
|
|
68
|
+
def contains_media?(messages)
|
|
69
|
+
contains_images?(messages) || contains_documents?(messages)
|
|
70
|
+
end
|
|
71
|
+
|
|
61
72
|
# Format multimodal messages for a specific provider
|
|
62
73
|
# @param messages [Array<Hash>] Array of message hashes
|
|
63
74
|
# @param provider_name [String] Provider name for image validation and formatting
|
|
@@ -71,6 +82,8 @@ module DSPy
|
|
|
71
82
|
{ type: 'text', text: item[:text] }
|
|
72
83
|
when 'image'
|
|
73
84
|
format_image_for_provider(item[:image], provider_name)
|
|
85
|
+
when 'document'
|
|
86
|
+
format_document_for_provider(item[:document], provider_name)
|
|
74
87
|
else
|
|
75
88
|
item
|
|
76
89
|
end
|
|
@@ -96,6 +109,16 @@ module DSPy
|
|
|
96
109
|
{ type: 'image', image: image }
|
|
97
110
|
end
|
|
98
111
|
end
|
|
112
|
+
|
|
113
|
+
def format_document_for_provider(document, provider_name)
|
|
114
|
+
document.validate_for_provider!(provider_name)
|
|
115
|
+
format_method = "to_#{provider_name}_format"
|
|
116
|
+
if document.respond_to?(format_method)
|
|
117
|
+
document.send(format_method)
|
|
118
|
+
else
|
|
119
|
+
{ type: 'document', document: document }
|
|
120
|
+
end
|
|
121
|
+
end
|
|
99
122
|
end
|
|
100
123
|
end
|
|
101
124
|
end
|
data/lib/dspy/lm/errors.rb
CHANGED
|
@@ -7,8 +7,6 @@ module DSPy
|
|
|
7
7
|
class UnsupportedProviderError < Error; end
|
|
8
8
|
class ConfigurationError < Error; end
|
|
9
9
|
class MissingAdapterError < Error; end
|
|
10
|
-
class UnsupportedVersionError < Error; end
|
|
11
|
-
class MissingOfficialSDKError < Error; end
|
|
12
10
|
|
|
13
11
|
# Raised when API key is missing or invalid
|
|
14
12
|
class MissingAPIKeyError < Error
|
|
@@ -29,5 +27,12 @@ module DSPy
|
|
|
29
27
|
super(message)
|
|
30
28
|
end
|
|
31
29
|
end
|
|
30
|
+
|
|
31
|
+
# Raised when document features are incompatible with the target provider
|
|
32
|
+
class IncompatibleDocumentFeatureError < AdapterError
|
|
33
|
+
def initialize(message)
|
|
34
|
+
super(message)
|
|
35
|
+
end
|
|
36
|
+
end
|
|
32
37
|
end
|
|
33
38
|
end
|
|
@@ -38,17 +38,8 @@ module DSPy
|
|
|
38
38
|
# OpenAI/Ollama: try to extract JSON from various formats
|
|
39
39
|
extract_json_from_content(response.content)
|
|
40
40
|
elsif adapter_class_name.include?('AnthropicAdapter')
|
|
41
|
-
# Anthropic:
|
|
42
|
-
|
|
43
|
-
structured_outputs_enabled = true if structured_outputs_enabled.nil? # Default to true
|
|
44
|
-
|
|
45
|
-
if structured_outputs_enabled
|
|
46
|
-
extracted = extract_anthropic_tool_json(response)
|
|
47
|
-
extracted || extract_json_from_content(response.content)
|
|
48
|
-
else
|
|
49
|
-
# Skip tool extraction, use enhanced prompting extraction
|
|
50
|
-
extract_json_from_content(response.content)
|
|
51
|
-
end
|
|
41
|
+
# Anthropic: Beta API returns JSON in content, same as OpenAI/Gemini
|
|
42
|
+
extract_json_from_content(response.content)
|
|
52
43
|
elsif adapter_class_name.include?('GeminiAdapter')
|
|
53
44
|
# Gemini: try to extract JSON from various formats
|
|
54
45
|
extract_json_from_content(response.content)
|
|
@@ -90,25 +81,30 @@ module DSPy
|
|
|
90
81
|
# Anthropic preparation
|
|
91
82
|
sig { params(messages: T::Array[T::Hash[Symbol, T.untyped]], request_params: T::Hash[Symbol, T.untyped]).void }
|
|
92
83
|
def prepare_anthropic_request(messages, request_params)
|
|
93
|
-
|
|
94
|
-
|
|
84
|
+
begin
|
|
85
|
+
require "dspy/anthropic/lm/schema_converter"
|
|
86
|
+
rescue LoadError
|
|
87
|
+
msg = <<~MSG
|
|
88
|
+
Anthropic adapter is optional; structured output helpers will be unavailable until the gem is installed.
|
|
89
|
+
Add `gem 'dspy-anthropic'` to your Gemfile and run `bundle install`.
|
|
90
|
+
MSG
|
|
91
|
+
raise DSPy::LM::MissingAdapterError, msg
|
|
92
|
+
end
|
|
95
93
|
|
|
96
|
-
#
|
|
94
|
+
# Only use Beta API structured outputs if enabled (default: true)
|
|
95
|
+
structured_outputs_enabled = adapter.instance_variable_get(:@structured_outputs_enabled)
|
|
97
96
|
structured_outputs_enabled = true if structured_outputs_enabled.nil?
|
|
98
97
|
|
|
99
98
|
return unless structured_outputs_enabled
|
|
100
99
|
|
|
101
|
-
#
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
# Add tool definition
|
|
105
|
-
request_params[:tools] = [tool_schema]
|
|
100
|
+
# Use Anthropic Beta API structured outputs
|
|
101
|
+
schema = DSPy::Anthropic::LM::SchemaConverter.to_beta_format(signature_class)
|
|
106
102
|
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
|
|
103
|
+
request_params[:output_format] = ::Anthropic::Models::Beta::BetaJSONOutputFormat.new(
|
|
104
|
+
type: :json_schema,
|
|
105
|
+
schema: schema
|
|
106
|
+
)
|
|
107
|
+
request_params[:betas] = ["structured-outputs-2025-11-13"]
|
|
112
108
|
end
|
|
113
109
|
|
|
114
110
|
# Gemini preparation
|
|
@@ -135,84 +131,6 @@ module DSPy
|
|
|
135
131
|
end
|
|
136
132
|
end
|
|
137
133
|
|
|
138
|
-
# Convert signature to Anthropic tool schema
|
|
139
|
-
# Uses strict: true for constrained decoding (Anthropic structured outputs)
|
|
140
|
-
# Anthropic strict mode requires ALL properties in required at every level.
|
|
141
|
-
sig { returns(T::Hash[Symbol, T.untyped]) }
|
|
142
|
-
def convert_to_anthropic_tool_schema
|
|
143
|
-
output_fields = signature_class.output_field_descriptors
|
|
144
|
-
|
|
145
|
-
schema = {
|
|
146
|
-
name: "json_output",
|
|
147
|
-
description: "Output the result in the required JSON format",
|
|
148
|
-
strict: true,
|
|
149
|
-
input_schema: {
|
|
150
|
-
type: "object",
|
|
151
|
-
properties: build_properties_from_fields(output_fields),
|
|
152
|
-
required: build_required_from_fields(output_fields),
|
|
153
|
-
additionalProperties: false
|
|
154
|
-
}
|
|
155
|
-
}
|
|
156
|
-
|
|
157
|
-
# Anthropic strict mode: ALL properties must be in required at every level.
|
|
158
|
-
# Non-required properties get auto-wrapped in null unions by the grammar compiler,
|
|
159
|
-
# which counts against the 16-union-parameter limit.
|
|
160
|
-
enforce_all_required(schema[:input_schema])
|
|
161
|
-
|
|
162
|
-
schema
|
|
163
|
-
end
|
|
164
|
-
|
|
165
|
-
# Build required field list, excluding fields that have defaults
|
|
166
|
-
sig { params(fields: T::Hash[Symbol, T.untyped]).returns(T::Array[String]) }
|
|
167
|
-
def build_required_from_fields(fields)
|
|
168
|
-
fields.reject { |_name, descriptor| descriptor.has_default }.keys.map(&:to_s)
|
|
169
|
-
end
|
|
170
|
-
|
|
171
|
-
# Recursively enforce that all properties are in required and
|
|
172
|
-
# additionalProperties is false, as required by Anthropic strict mode.
|
|
173
|
-
sig { params(schema: T::Hash[Symbol, T.untyped]).void }
|
|
174
|
-
def enforce_all_required(schema)
|
|
175
|
-
return unless schema.is_a?(Hash)
|
|
176
|
-
|
|
177
|
-
if schema[:type] == "object" && schema[:properties]
|
|
178
|
-
schema[:required] = schema[:properties].keys.map(&:to_s)
|
|
179
|
-
schema[:additionalProperties] = false
|
|
180
|
-
schema[:properties].each_value { |v| enforce_all_required(v) }
|
|
181
|
-
elsif schema[:type] == "array" && schema[:items]
|
|
182
|
-
enforce_all_required(schema[:items])
|
|
183
|
-
elsif schema[:type].is_a?(Array)
|
|
184
|
-
# type: ["array", "null"] — check items if present
|
|
185
|
-
enforce_all_required(schema[:items]) if schema[:items]
|
|
186
|
-
end
|
|
187
|
-
end
|
|
188
|
-
|
|
189
|
-
# Build JSON schema properties from output fields
|
|
190
|
-
sig { params(fields: T::Hash[Symbol, T.untyped]).returns(T::Hash[String, T.untyped]) }
|
|
191
|
-
def build_properties_from_fields(fields)
|
|
192
|
-
properties = {}
|
|
193
|
-
fields.each do |field_name, descriptor|
|
|
194
|
-
properties[field_name.to_s] = DSPy::TypeSystem::SorbetJsonSchema.type_to_json_schema(descriptor.type)
|
|
195
|
-
end
|
|
196
|
-
properties
|
|
197
|
-
end
|
|
198
|
-
|
|
199
|
-
# Extract JSON from Anthropic tool use response
|
|
200
|
-
sig { params(response: DSPy::LM::Response).returns(T.nilable(String)) }
|
|
201
|
-
def extract_anthropic_tool_json(response)
|
|
202
|
-
# Check for tool calls in metadata
|
|
203
|
-
if response.metadata.respond_to?(:tool_calls) && response.metadata.tool_calls
|
|
204
|
-
tool_calls = response.metadata.tool_calls
|
|
205
|
-
if tool_calls.is_a?(Array) && !tool_calls.empty?
|
|
206
|
-
first_call = tool_calls.first
|
|
207
|
-
if first_call[:name] == "json_output" && first_call[:input]
|
|
208
|
-
return JSON.generate(first_call[:input])
|
|
209
|
-
end
|
|
210
|
-
end
|
|
211
|
-
end
|
|
212
|
-
|
|
213
|
-
nil
|
|
214
|
-
end
|
|
215
|
-
|
|
216
134
|
# Extract JSON from content that may contain markdown or plain JSON
|
|
217
135
|
sig { params(content: String).returns(String) }
|
|
218
136
|
def extract_json_from_content(content)
|
|
@@ -221,48 +139,93 @@ module DSPy
|
|
|
221
139
|
# Try 1: Check for ```json code block (with or without preceding text)
|
|
222
140
|
if content.include?('```json')
|
|
223
141
|
json_match = content.match(/```json\s*\n(.*?)\n```/m)
|
|
224
|
-
|
|
142
|
+
if json_match
|
|
143
|
+
normalized = normalize_json_candidate(json_match[1].strip)
|
|
144
|
+
return normalized if valid_json?(normalized)
|
|
145
|
+
end
|
|
225
146
|
end
|
|
226
147
|
|
|
227
148
|
# Try 2: Check for generic ``` code block
|
|
228
149
|
if content.include?('```')
|
|
229
150
|
code_match = content.match(/```\s*\n(.*?)\n```/m)
|
|
230
151
|
if code_match
|
|
231
|
-
potential_json = code_match[1].strip
|
|
232
|
-
|
|
233
|
-
begin
|
|
234
|
-
JSON.parse(potential_json)
|
|
235
|
-
return potential_json
|
|
236
|
-
rescue JSON::ParserError
|
|
237
|
-
# Not valid JSON, continue
|
|
238
|
-
end
|
|
152
|
+
potential_json = normalize_json_candidate(code_match[1].strip)
|
|
153
|
+
return potential_json if valid_json?(potential_json)
|
|
239
154
|
end
|
|
240
155
|
end
|
|
241
156
|
|
|
242
157
|
# Try 3: Try parsing entire content as JSON
|
|
243
|
-
|
|
244
|
-
|
|
245
|
-
return content
|
|
246
|
-
rescue JSON::ParserError
|
|
247
|
-
# Not pure JSON, try extracting
|
|
248
|
-
end
|
|
158
|
+
normalized_content = normalize_json_candidate(content)
|
|
159
|
+
return normalized_content if valid_json?(normalized_content)
|
|
249
160
|
|
|
250
161
|
# Try 4: Look for JSON object pattern in text (greedy match for nested objects)
|
|
251
162
|
json_pattern = /\{(?:[^{}]|\{(?:[^{}]|\{[^{}]*\})*\})*\}/m
|
|
252
163
|
json_match = content.match(json_pattern)
|
|
253
164
|
if json_match
|
|
254
|
-
potential_json = json_match[0]
|
|
255
|
-
|
|
256
|
-
JSON.parse(potential_json)
|
|
257
|
-
return potential_json
|
|
258
|
-
rescue JSON::ParserError
|
|
259
|
-
# Not valid JSON
|
|
260
|
-
end
|
|
165
|
+
potential_json = normalize_json_candidate(json_match[0])
|
|
166
|
+
return potential_json if valid_json?(potential_json)
|
|
261
167
|
end
|
|
262
168
|
|
|
263
169
|
# Return content as-is if no JSON found
|
|
264
170
|
content
|
|
265
171
|
end
|
|
172
|
+
|
|
173
|
+
sig { params(content: String).returns(String) }
|
|
174
|
+
def normalize_json_candidate(content)
|
|
175
|
+
escape_control_characters_in_strings(remove_trailing_object_commas(content))
|
|
176
|
+
end
|
|
177
|
+
|
|
178
|
+
sig { params(content: String).returns(String) }
|
|
179
|
+
def remove_trailing_object_commas(content)
|
|
180
|
+
content.sub(/,(\s*\}\s*)$/, '\1')
|
|
181
|
+
end
|
|
182
|
+
|
|
183
|
+
sig { params(content: String).returns(T::Boolean) }
|
|
184
|
+
def valid_json?(content)
|
|
185
|
+
JSON.parse(content)
|
|
186
|
+
true
|
|
187
|
+
rescue JSON::ParserError
|
|
188
|
+
false
|
|
189
|
+
end
|
|
190
|
+
|
|
191
|
+
sig { params(content: String).returns(String) }
|
|
192
|
+
def escape_control_characters_in_strings(content)
|
|
193
|
+
escaped = +""
|
|
194
|
+
in_string = false
|
|
195
|
+
escaping = false
|
|
196
|
+
|
|
197
|
+
content.each_char do |char|
|
|
198
|
+
if in_string
|
|
199
|
+
if escaping
|
|
200
|
+
escaped << char
|
|
201
|
+
escaping = false
|
|
202
|
+
next
|
|
203
|
+
end
|
|
204
|
+
|
|
205
|
+
case char
|
|
206
|
+
when '\\'
|
|
207
|
+
escaped << char
|
|
208
|
+
escaping = true
|
|
209
|
+
when '"'
|
|
210
|
+
escaped << char
|
|
211
|
+
in_string = false
|
|
212
|
+
when "\n"
|
|
213
|
+
escaped << '\n'
|
|
214
|
+
when "\r"
|
|
215
|
+
escaped << '\r'
|
|
216
|
+
when "\t"
|
|
217
|
+
escaped << '\t'
|
|
218
|
+
else
|
|
219
|
+
escaped << (char.ord < 0x20 ? "" : char)
|
|
220
|
+
end
|
|
221
|
+
else
|
|
222
|
+
escaped << char
|
|
223
|
+
in_string = true if char == '"'
|
|
224
|
+
end
|
|
225
|
+
end
|
|
226
|
+
|
|
227
|
+
escaped
|
|
228
|
+
end
|
|
266
229
|
end
|
|
267
230
|
end
|
|
268
231
|
end
|