dspy 0.15.7 → 0.16.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +40 -40
- data/lib/dspy/image.rb +196 -0
- data/lib/dspy/lm/adapter.rb +19 -4
- data/lib/dspy/lm/adapters/anthropic_adapter.rb +54 -3
- data/lib/dspy/lm/adapters/openai_adapter.rb +61 -2
- data/lib/dspy/lm/errors.rb +7 -0
- data/lib/dspy/lm/message.rb +68 -4
- data/lib/dspy/lm/message_builder.rb +28 -0
- data/lib/dspy/lm/vision_models.rb +58 -0
- data/lib/dspy/version.rb +1 -1
- data/lib/dspy.rb +1 -0
- metadata +4 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 4ad35c319e21d14ee6ac3d27a6dece4b3eb1c9818ef7314d028b937806e6db4e
|
4
|
+
data.tar.gz: 3e782adb5c7daa973dfeb3d0adf26235190ff6d76dac0bf133868ea3d515fdea
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: b5ea3a0696fa843326ad959757051b3067f7f18715d4f8be2be68f7412dd41c34ce3bd7e9c541fe1d7637a3a1a36b2056152b5131147a3652b505d2f2f6fb023
|
7
|
+
data.tar.gz: 0f59b08312c2d562619154c7de42958dd01830e44b0139dfd9352256a84085098e42026ec319468c85d0942fd998bbb73773e714b96b58e242751d426afbb647
|
data/README.md
CHANGED
@@ -11,6 +11,46 @@ Traditional prompting is like writing code with string concatenation: it works u
|
|
11
11
|
|
12
12
|
The result? LLM applications that actually scale and don't break when you sneeze.
|
13
13
|
|
14
|
+
## Your First DSPy Program
|
15
|
+
|
16
|
+
```ruby
|
17
|
+
# Define a signature for sentiment classification
|
18
|
+
class Classify < DSPy::Signature
|
19
|
+
description "Classify sentiment of a given sentence."
|
20
|
+
|
21
|
+
class Sentiment < T::Enum
|
22
|
+
enums do
|
23
|
+
Positive = new('positive')
|
24
|
+
Negative = new('negative')
|
25
|
+
Neutral = new('neutral')
|
26
|
+
end
|
27
|
+
end
|
28
|
+
|
29
|
+
input do
|
30
|
+
const :sentence, String
|
31
|
+
end
|
32
|
+
|
33
|
+
output do
|
34
|
+
const :sentiment, Sentiment
|
35
|
+
const :confidence, Float
|
36
|
+
end
|
37
|
+
end
|
38
|
+
|
39
|
+
# Configure DSPy with your LLM
|
40
|
+
DSPy.configure do |c|
|
41
|
+
c.lm = DSPy::LM.new('openai/gpt-4o-mini',
|
42
|
+
api_key: ENV['OPENAI_API_KEY'],
|
43
|
+
structured_outputs: true) # Enable OpenAI's native JSON mode
|
44
|
+
end
|
45
|
+
|
46
|
+
# Create the predictor and run inference
|
47
|
+
classify = DSPy::Predict.new(Classify)
|
48
|
+
result = classify.call(sentence: "This book was super fun to read!")
|
49
|
+
|
50
|
+
puts result.sentiment # => #<Sentiment::Positive>
|
51
|
+
puts result.confidence # => 0.85
|
52
|
+
```
|
53
|
+
|
14
54
|
## What You Get
|
15
55
|
|
16
56
|
**Core Building Blocks:**
|
@@ -95,46 +135,6 @@ sudo apt-get install cmake
|
|
95
135
|
|
96
136
|
**Note**: The `polars-df` gem compilation can take 15-20 minutes. Pre-built binaries are available for most platforms, so compilation is only needed if a pre-built binary isn't available for your system.
|
97
137
|
|
98
|
-
### Your First DSPy Program
|
99
|
-
|
100
|
-
```ruby
|
101
|
-
# Define a signature for sentiment classification
|
102
|
-
class Classify < DSPy::Signature
|
103
|
-
description "Classify sentiment of a given sentence."
|
104
|
-
|
105
|
-
class Sentiment < T::Enum
|
106
|
-
enums do
|
107
|
-
Positive = new('positive')
|
108
|
-
Negative = new('negative')
|
109
|
-
Neutral = new('neutral')
|
110
|
-
end
|
111
|
-
end
|
112
|
-
|
113
|
-
input do
|
114
|
-
const :sentence, String
|
115
|
-
end
|
116
|
-
|
117
|
-
output do
|
118
|
-
const :sentiment, Sentiment
|
119
|
-
const :confidence, Float
|
120
|
-
end
|
121
|
-
end
|
122
|
-
|
123
|
-
# Configure DSPy with your LLM
|
124
|
-
DSPy.configure do |c|
|
125
|
-
c.lm = DSPy::LM.new('openai/gpt-4o-mini',
|
126
|
-
api_key: ENV['OPENAI_API_KEY'],
|
127
|
-
structured_outputs: true) # Enable OpenAI's native JSON mode
|
128
|
-
end
|
129
|
-
|
130
|
-
# Create the predictor and run inference
|
131
|
-
classify = DSPy::Predict.new(Classify)
|
132
|
-
result = classify.call(sentence: "This book was super fun to read!")
|
133
|
-
|
134
|
-
puts result.sentiment # => #<Sentiment::Positive>
|
135
|
-
puts result.confidence # => 0.85
|
136
|
-
```
|
137
|
-
|
138
138
|
## Documentation
|
139
139
|
|
140
140
|
📖 **[Complete Documentation Website](https://vicentereig.github.io/dspy.rb/)**
|
data/lib/dspy/image.rb
ADDED
@@ -0,0 +1,196 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require 'base64'
|
4
|
+
require 'uri'
|
5
|
+
|
6
|
+
module DSPy
|
7
|
+
class Image
|
8
|
+
attr_reader :url, :base64, :data, :content_type, :detail
|
9
|
+
|
10
|
+
SUPPORTED_FORMATS = %w[image/jpeg image/png image/gif image/webp].freeze
|
11
|
+
MAX_SIZE_BYTES = 5 * 1024 * 1024 # 5MB limit
|
12
|
+
|
13
|
+
# Provider capability registry
|
14
|
+
PROVIDER_CAPABILITIES = {
|
15
|
+
'openai' => {
|
16
|
+
sources: %w[url base64 data],
|
17
|
+
parameters: %w[detail]
|
18
|
+
},
|
19
|
+
'anthropic' => {
|
20
|
+
sources: %w[base64 data],
|
21
|
+
parameters: []
|
22
|
+
}
|
23
|
+
}.freeze
|
24
|
+
|
25
|
+
def initialize(url: nil, base64: nil, data: nil, content_type: nil, detail: nil)
|
26
|
+
@detail = detail # OpenAI detail level: 'low', 'high', or 'auto'
|
27
|
+
|
28
|
+
# Validate input
|
29
|
+
validate_input!(url, base64, data)
|
30
|
+
|
31
|
+
if url
|
32
|
+
@url = url
|
33
|
+
@content_type = content_type || infer_content_type_from_url(url)
|
34
|
+
elsif base64
|
35
|
+
raise ArgumentError, "content_type is required when using base64" unless content_type
|
36
|
+
@base64 = base64
|
37
|
+
@content_type = content_type
|
38
|
+
validate_size!(Base64.decode64(base64).bytesize)
|
39
|
+
elsif data
|
40
|
+
raise ArgumentError, "content_type is required when using data" unless content_type
|
41
|
+
@data = data
|
42
|
+
@content_type = content_type
|
43
|
+
validate_size!(data.size)
|
44
|
+
end
|
45
|
+
|
46
|
+
validate_content_type!
|
47
|
+
end
|
48
|
+
|
49
|
+
def to_openai_format
|
50
|
+
if url
|
51
|
+
format = {
|
52
|
+
type: 'image_url',
|
53
|
+
image_url: {
|
54
|
+
url: url
|
55
|
+
}
|
56
|
+
}
|
57
|
+
format[:image_url][:detail] = detail if detail
|
58
|
+
format
|
59
|
+
elsif base64
|
60
|
+
{
|
61
|
+
type: 'image_url',
|
62
|
+
image_url: {
|
63
|
+
url: "data:#{content_type};base64,#{base64}"
|
64
|
+
}
|
65
|
+
}
|
66
|
+
elsif data
|
67
|
+
{
|
68
|
+
type: 'image_url',
|
69
|
+
image_url: {
|
70
|
+
url: "data:#{content_type};base64,#{to_base64}"
|
71
|
+
}
|
72
|
+
}
|
73
|
+
end
|
74
|
+
end
|
75
|
+
|
76
|
+
def to_anthropic_format
|
77
|
+
if url
|
78
|
+
# Anthropic requires base64, so we'd need to fetch the URL
|
79
|
+
# For now, we'll raise an error or skip
|
80
|
+
raise NotImplementedError, "URL fetching for Anthropic not yet implemented"
|
81
|
+
elsif base64
|
82
|
+
{
|
83
|
+
type: 'image',
|
84
|
+
source: {
|
85
|
+
type: 'base64',
|
86
|
+
media_type: content_type,
|
87
|
+
data: base64
|
88
|
+
}
|
89
|
+
}
|
90
|
+
elsif data
|
91
|
+
{
|
92
|
+
type: 'image',
|
93
|
+
source: {
|
94
|
+
type: 'base64',
|
95
|
+
media_type: content_type,
|
96
|
+
data: to_base64
|
97
|
+
}
|
98
|
+
}
|
99
|
+
end
|
100
|
+
end
|
101
|
+
|
102
|
+
def to_base64
|
103
|
+
return base64 if base64
|
104
|
+
return Base64.strict_encode64(data.pack('C*')) if data
|
105
|
+
nil
|
106
|
+
end
|
107
|
+
|
108
|
+
def validate!
|
109
|
+
validate_content_type!
|
110
|
+
|
111
|
+
if base64
|
112
|
+
validate_size!(Base64.decode64(base64).bytesize)
|
113
|
+
elsif data
|
114
|
+
validate_size!(data.size)
|
115
|
+
end
|
116
|
+
end
|
117
|
+
|
118
|
+
def validate_for_provider!(provider)
|
119
|
+
capabilities = PROVIDER_CAPABILITIES[provider]
|
120
|
+
|
121
|
+
unless capabilities
|
122
|
+
raise DSPy::LM::IncompatibleImageFeatureError,
|
123
|
+
"Unknown provider '#{provider}'. Supported providers: #{PROVIDER_CAPABILITIES.keys.join(', ')}"
|
124
|
+
end
|
125
|
+
|
126
|
+
# Check source compatibility
|
127
|
+
current_source = if url
|
128
|
+
'url'
|
129
|
+
elsif base64
|
130
|
+
'base64'
|
131
|
+
elsif data
|
132
|
+
'data'
|
133
|
+
end
|
134
|
+
|
135
|
+
unless capabilities[:sources].include?(current_source)
|
136
|
+
case provider
|
137
|
+
when 'anthropic'
|
138
|
+
if current_source == 'url'
|
139
|
+
raise DSPy::LM::IncompatibleImageFeatureError,
|
140
|
+
"Anthropic doesn't support image URLs. Please provide base64 or raw data instead."
|
141
|
+
end
|
142
|
+
end
|
143
|
+
end
|
144
|
+
|
145
|
+
# Check parameter compatibility
|
146
|
+
if detail && !capabilities[:parameters].include?('detail')
|
147
|
+
case provider
|
148
|
+
when 'anthropic'
|
149
|
+
raise DSPy::LM::IncompatibleImageFeatureError,
|
150
|
+
"Anthropic doesn't support the 'detail' parameter. This feature is OpenAI-specific."
|
151
|
+
end
|
152
|
+
end
|
153
|
+
end
|
154
|
+
|
155
|
+
private
|
156
|
+
|
157
|
+
def validate_input!(url, base64, data)
|
158
|
+
inputs = [url, base64, data].compact
|
159
|
+
|
160
|
+
if inputs.empty?
|
161
|
+
raise ArgumentError, "Must provide either url, base64, or data"
|
162
|
+
elsif inputs.size > 1
|
163
|
+
raise ArgumentError, "Only one of url, base64, or data can be provided"
|
164
|
+
end
|
165
|
+
end
|
166
|
+
|
167
|
+
def validate_content_type!
|
168
|
+
unless SUPPORTED_FORMATS.include?(content_type)
|
169
|
+
raise ArgumentError, "Unsupported image format: #{content_type}. Supported formats: #{SUPPORTED_FORMATS.join(', ')}"
|
170
|
+
end
|
171
|
+
end
|
172
|
+
|
173
|
+
def validate_size!(size_bytes)
|
174
|
+
if size_bytes > MAX_SIZE_BYTES
|
175
|
+
raise ArgumentError, "Image size exceeds 5MB limit (got #{size_bytes} bytes)"
|
176
|
+
end
|
177
|
+
end
|
178
|
+
|
179
|
+
def infer_content_type_from_url(url)
|
180
|
+
extension = File.extname(URI.parse(url).path).downcase
|
181
|
+
|
182
|
+
case extension
|
183
|
+
when '.jpg', '.jpeg'
|
184
|
+
'image/jpeg'
|
185
|
+
when '.png'
|
186
|
+
'image/png'
|
187
|
+
when '.gif'
|
188
|
+
'image/gif'
|
189
|
+
when '.webp'
|
190
|
+
'image/webp'
|
191
|
+
else
|
192
|
+
'image/jpeg' # Default fallback
|
193
|
+
end
|
194
|
+
end
|
195
|
+
end
|
196
|
+
end
|
data/lib/dspy/lm/adapter.rb
CHANGED
@@ -36,10 +36,25 @@ module DSPy
|
|
36
36
|
# Helper method to normalize message format
|
37
37
|
def normalize_messages(messages)
|
38
38
|
messages.map do |msg|
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
39
|
+
# Support both Message objects and hash format
|
40
|
+
if msg.is_a?(DSPy::LM::Message)
|
41
|
+
msg.to_h
|
42
|
+
else
|
43
|
+
content = msg[:content]
|
44
|
+
# Don't convert array content to string
|
45
|
+
{
|
46
|
+
role: msg[:role].to_s,
|
47
|
+
content: content.is_a?(Array) ? content : content.to_s
|
48
|
+
}
|
49
|
+
end
|
50
|
+
end
|
51
|
+
end
|
52
|
+
|
53
|
+
# Check if messages contain images
|
54
|
+
def contains_images?(messages)
|
55
|
+
messages.any? do |msg|
|
56
|
+
content = msg[:content] || msg.content
|
57
|
+
content.is_a?(Array) && content.any? { |item| item[:type] == 'image' }
|
43
58
|
end
|
44
59
|
end
|
45
60
|
end
|
@@ -1,6 +1,7 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
2
|
|
3
3
|
require 'anthropic'
|
4
|
+
require_relative '../vision_models'
|
4
5
|
|
5
6
|
module DSPy
|
6
7
|
class LM
|
@@ -12,14 +13,23 @@ module DSPy
|
|
12
13
|
end
|
13
14
|
|
14
15
|
def chat(messages:, signature: nil, **extra_params, &block)
|
16
|
+
normalized_messages = normalize_messages(messages)
|
17
|
+
|
18
|
+
# Validate vision support if images are present
|
19
|
+
if contains_images?(normalized_messages)
|
20
|
+
VisionModels.validate_vision_support!('anthropic', model)
|
21
|
+
# Convert messages to Anthropic format with proper image handling
|
22
|
+
normalized_messages = format_multimodal_messages(normalized_messages)
|
23
|
+
end
|
24
|
+
|
15
25
|
# Anthropic requires system message to be separate from messages
|
16
|
-
system_message, user_messages = extract_system_message(
|
26
|
+
system_message, user_messages = extract_system_message(normalized_messages)
|
17
27
|
|
18
28
|
# Check if this is a tool use request
|
19
29
|
has_tools = extra_params.key?(:tools) && !extra_params[:tools].empty?
|
20
30
|
|
21
31
|
# Apply JSON prefilling if needed for better Claude JSON compliance (but not for tool use)
|
22
|
-
unless has_tools
|
32
|
+
unless has_tools || contains_images?(normalized_messages)
|
23
33
|
user_messages = prepare_messages_for_json(user_messages, system_message)
|
24
34
|
end
|
25
35
|
|
@@ -111,7 +121,21 @@ module DSPy
|
|
111
121
|
)
|
112
122
|
end
|
113
123
|
rescue => e
|
114
|
-
|
124
|
+
# Check for specific image-related errors in the message
|
125
|
+
error_msg = e.message.to_s
|
126
|
+
|
127
|
+
if error_msg.include?('Could not process image')
|
128
|
+
raise AdapterError, "Image processing failed: #{error_msg}. Ensure your image is a valid PNG, JPEG, GIF, or WebP format, properly base64-encoded, and under 5MB."
|
129
|
+
elsif error_msg.include?('image')
|
130
|
+
raise AdapterError, "Image error: #{error_msg}. Anthropic requires base64-encoded images (URLs are not supported)."
|
131
|
+
elsif error_msg.include?('rate')
|
132
|
+
raise AdapterError, "Anthropic rate limit exceeded: #{error_msg}. Please wait and try again."
|
133
|
+
elsif error_msg.include?('authentication') || error_msg.include?('API key')
|
134
|
+
raise AdapterError, "Anthropic authentication failed: #{error_msg}. Check your API key."
|
135
|
+
else
|
136
|
+
# Generic error handling
|
137
|
+
raise AdapterError, "Anthropic adapter error: #{e.message}"
|
138
|
+
end
|
115
139
|
end
|
116
140
|
end
|
117
141
|
|
@@ -234,6 +258,33 @@ module DSPy
|
|
234
258
|
|
235
259
|
[system_message, user_messages]
|
236
260
|
end
|
261
|
+
|
262
|
+
def format_multimodal_messages(messages)
|
263
|
+
messages.map do |msg|
|
264
|
+
if msg[:content].is_a?(Array)
|
265
|
+
# Convert multimodal content to Anthropic format
|
266
|
+
formatted_content = msg[:content].map do |item|
|
267
|
+
case item[:type]
|
268
|
+
when 'text'
|
269
|
+
{ type: 'text', text: item[:text] }
|
270
|
+
when 'image'
|
271
|
+
# Validate image compatibility before formatting
|
272
|
+
item[:image].validate_for_provider!('anthropic')
|
273
|
+
item[:image].to_anthropic_format
|
274
|
+
else
|
275
|
+
item
|
276
|
+
end
|
277
|
+
end
|
278
|
+
|
279
|
+
{
|
280
|
+
role: msg[:role],
|
281
|
+
content: formatted_content
|
282
|
+
}
|
283
|
+
else
|
284
|
+
msg
|
285
|
+
end
|
286
|
+
end
|
287
|
+
end
|
237
288
|
end
|
238
289
|
end
|
239
290
|
end
|
@@ -2,6 +2,7 @@
|
|
2
2
|
|
3
3
|
require 'openai'
|
4
4
|
require_relative 'openai/schema_converter'
|
5
|
+
require_relative '../vision_models'
|
5
6
|
|
6
7
|
module DSPy
|
7
8
|
class LM
|
@@ -14,9 +15,18 @@ module DSPy
|
|
14
15
|
end
|
15
16
|
|
16
17
|
def chat(messages:, signature: nil, response_format: nil, &block)
|
18
|
+
normalized_messages = normalize_messages(messages)
|
19
|
+
|
20
|
+
# Validate vision support if images are present
|
21
|
+
if contains_images?(normalized_messages)
|
22
|
+
VisionModels.validate_vision_support!('openai', model)
|
23
|
+
# Convert messages to OpenAI format with proper image handling
|
24
|
+
normalized_messages = format_multimodal_messages(normalized_messages)
|
25
|
+
end
|
26
|
+
|
17
27
|
request_params = {
|
18
28
|
model: model,
|
19
|
-
messages:
|
29
|
+
messages: normalized_messages,
|
20
30
|
temperature: 0.0 # DSPy default for deterministic responses
|
21
31
|
}
|
22
32
|
|
@@ -72,7 +82,29 @@ module DSPy
|
|
72
82
|
metadata: metadata
|
73
83
|
)
|
74
84
|
rescue => e
|
75
|
-
|
85
|
+
# Check for specific error types and messages
|
86
|
+
error_msg = e.message.to_s
|
87
|
+
|
88
|
+
# Try to parse error body if it looks like JSON
|
89
|
+
error_body = if error_msg.start_with?('{')
|
90
|
+
JSON.parse(error_msg) rescue nil
|
91
|
+
elsif e.respond_to?(:response) && e.response
|
92
|
+
e.response[:body] rescue nil
|
93
|
+
end
|
94
|
+
|
95
|
+
# Check for specific image-related errors
|
96
|
+
if error_msg.include?('image_parse_error') || error_msg.include?('unsupported image')
|
97
|
+
raise AdapterError, "Image processing failed: #{error_msg}. Ensure your image is a valid PNG, JPEG, GIF, or WebP format and under 5MB."
|
98
|
+
elsif error_msg.include?('rate') && error_msg.include?('limit')
|
99
|
+
raise AdapterError, "OpenAI rate limit exceeded: #{error_msg}. Please wait and try again."
|
100
|
+
elsif error_msg.include?('authentication') || error_msg.include?('API key') || error_msg.include?('Unauthorized')
|
101
|
+
raise AdapterError, "OpenAI authentication failed: #{error_msg}. Check your API key."
|
102
|
+
elsif error_body && error_body.dig('error', 'message')
|
103
|
+
raise AdapterError, "OpenAI API error: #{error_body.dig('error', 'message')}"
|
104
|
+
else
|
105
|
+
# Generic error handling
|
106
|
+
raise AdapterError, "OpenAI adapter error: #{e.message}"
|
107
|
+
end
|
76
108
|
end
|
77
109
|
end
|
78
110
|
|
@@ -81,6 +113,33 @@ module DSPy
|
|
81
113
|
def supports_structured_outputs?
|
82
114
|
DSPy::LM::Adapters::OpenAI::SchemaConverter.supports_structured_outputs?(model)
|
83
115
|
end
|
116
|
+
|
117
|
+
def format_multimodal_messages(messages)
|
118
|
+
messages.map do |msg|
|
119
|
+
if msg[:content].is_a?(Array)
|
120
|
+
# Convert multimodal content to OpenAI format
|
121
|
+
formatted_content = msg[:content].map do |item|
|
122
|
+
case item[:type]
|
123
|
+
when 'text'
|
124
|
+
{ type: 'text', text: item[:text] }
|
125
|
+
when 'image'
|
126
|
+
# Validate image compatibility before formatting
|
127
|
+
item[:image].validate_for_provider!('openai')
|
128
|
+
item[:image].to_openai_format
|
129
|
+
else
|
130
|
+
item
|
131
|
+
end
|
132
|
+
end
|
133
|
+
|
134
|
+
{
|
135
|
+
role: msg[:role],
|
136
|
+
content: formatted_content
|
137
|
+
}
|
138
|
+
else
|
139
|
+
msg
|
140
|
+
end
|
141
|
+
end
|
142
|
+
end
|
84
143
|
end
|
85
144
|
end
|
86
145
|
end
|
data/lib/dspy/lm/errors.rb
CHANGED
@@ -19,5 +19,12 @@ module DSPy
|
|
19
19
|
super("API key is required but was not provided. Set it via the api_key parameter or #{env_var} environment variable.")
|
20
20
|
end
|
21
21
|
end
|
22
|
+
|
23
|
+
# Raised when image features are incompatible with the target provider
|
24
|
+
class IncompatibleImageFeatureError < AdapterError
|
25
|
+
def initialize(message)
|
26
|
+
super(message)
|
27
|
+
end
|
28
|
+
end
|
22
29
|
end
|
23
30
|
end
|
data/lib/dspy/lm/message.rb
CHANGED
@@ -18,7 +18,7 @@ module DSPy
|
|
18
18
|
end
|
19
19
|
|
20
20
|
const :role, Role
|
21
|
-
const :content, String
|
21
|
+
const :content, T.any(String, T::Array[T::Hash[Symbol, T.untyped]])
|
22
22
|
const :name, T.nilable(String), default: nil
|
23
23
|
|
24
24
|
sig { returns(T::Hash[Symbol, T.untyped]) }
|
@@ -33,7 +33,64 @@ module DSPy
|
|
33
33
|
|
34
34
|
sig { returns(String) }
|
35
35
|
def to_s
|
36
|
-
|
36
|
+
if content.is_a?(String)
|
37
|
+
name ? "#{role.serialize}(#{name}): #{content}" : "#{role.serialize}: #{content}"
|
38
|
+
else
|
39
|
+
name ? "#{role.serialize}(#{name}): [multimodal content]" : "#{role.serialize}: [multimodal content]"
|
40
|
+
end
|
41
|
+
end
|
42
|
+
|
43
|
+
sig { returns(T::Boolean) }
|
44
|
+
def multimodal?
|
45
|
+
content.is_a?(Array)
|
46
|
+
end
|
47
|
+
|
48
|
+
sig { returns(T::Hash[Symbol, T.untyped]) }
|
49
|
+
def to_openai_format
|
50
|
+
formatted = { role: role.serialize }
|
51
|
+
|
52
|
+
if content.is_a?(String)
|
53
|
+
formatted[:content] = content
|
54
|
+
else
|
55
|
+
# Convert multimodal content array to OpenAI format
|
56
|
+
formatted[:content] = content.map do |item|
|
57
|
+
case item[:type]
|
58
|
+
when 'text'
|
59
|
+
{ type: 'text', text: item[:text] }
|
60
|
+
when 'image'
|
61
|
+
item[:image].to_openai_format
|
62
|
+
else
|
63
|
+
item
|
64
|
+
end
|
65
|
+
end
|
66
|
+
end
|
67
|
+
|
68
|
+
formatted[:name] = name if name
|
69
|
+
formatted
|
70
|
+
end
|
71
|
+
|
72
|
+
sig { returns(T::Hash[Symbol, T.untyped]) }
|
73
|
+
def to_anthropic_format
|
74
|
+
formatted = { role: role.serialize }
|
75
|
+
|
76
|
+
if content.is_a?(String)
|
77
|
+
formatted[:content] = content
|
78
|
+
else
|
79
|
+
# Convert multimodal content array to Anthropic format
|
80
|
+
formatted[:content] = content.map do |item|
|
81
|
+
case item[:type]
|
82
|
+
when 'text'
|
83
|
+
{ type: 'text', text: item[:text] }
|
84
|
+
when 'image'
|
85
|
+
item[:image].to_anthropic_format
|
86
|
+
else
|
87
|
+
item
|
88
|
+
end
|
89
|
+
end
|
90
|
+
end
|
91
|
+
|
92
|
+
formatted[:name] = name if name
|
93
|
+
formatted
|
37
94
|
end
|
38
95
|
end
|
39
96
|
|
@@ -71,10 +128,17 @@ module DSPy
|
|
71
128
|
sig { params(data: T::Hash[Symbol, T.untyped]).returns(T.nilable(Message)) }
|
72
129
|
def self.create_from_hash(data)
|
73
130
|
role_str = data[:role]&.to_s
|
74
|
-
content = data[:content]
|
131
|
+
content = data[:content]
|
75
132
|
|
76
133
|
return nil if role_str.nil? || content.nil?
|
77
134
|
|
135
|
+
# Handle both string and array content
|
136
|
+
formatted_content = if content.is_a?(Array)
|
137
|
+
content
|
138
|
+
else
|
139
|
+
content.to_s
|
140
|
+
end
|
141
|
+
|
78
142
|
# Convert string role to enum
|
79
143
|
role = case role_str
|
80
144
|
when 'system' then Message::Role::System
|
@@ -87,7 +151,7 @@ module DSPy
|
|
87
151
|
|
88
152
|
Message.new(
|
89
153
|
role: role,
|
90
|
-
content:
|
154
|
+
content: formatted_content,
|
91
155
|
name: data[:name]&.to_s
|
92
156
|
)
|
93
157
|
rescue => e
|
@@ -41,6 +41,34 @@ module DSPy
|
|
41
41
|
self
|
42
42
|
end
|
43
43
|
|
44
|
+
sig { params(text: String, image: DSPy::Image).returns(MessageBuilder) }
|
45
|
+
def user_with_image(text, image)
|
46
|
+
content_array = [
|
47
|
+
{ type: 'text', text: text },
|
48
|
+
{ type: 'image', image: image }
|
49
|
+
]
|
50
|
+
|
51
|
+
@messages << Message.new(
|
52
|
+
role: Message::Role::User,
|
53
|
+
content: content_array
|
54
|
+
)
|
55
|
+
self
|
56
|
+
end
|
57
|
+
|
58
|
+
sig { params(text: String, images: T::Array[DSPy::Image]).returns(MessageBuilder) }
|
59
|
+
def user_with_images(text, images)
|
60
|
+
content_array = [{ type: 'text', text: text }]
|
61
|
+
images.each do |image|
|
62
|
+
content_array << { type: 'image', image: image }
|
63
|
+
end
|
64
|
+
|
65
|
+
@messages << Message.new(
|
66
|
+
role: Message::Role::User,
|
67
|
+
content: content_array
|
68
|
+
)
|
69
|
+
self
|
70
|
+
end
|
71
|
+
|
44
72
|
# For backward compatibility, allow conversion to hash array
|
45
73
|
sig { returns(T::Array[T::Hash[Symbol, T.untyped]]) }
|
46
74
|
def to_h
|
@@ -0,0 +1,58 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module DSPy
|
4
|
+
class LM
|
5
|
+
module VisionModels
|
6
|
+
# OpenAI vision-capable models
|
7
|
+
OPENAI_VISION_MODELS = [
|
8
|
+
'gpt-4-vision-preview',
|
9
|
+
'gpt-4-turbo',
|
10
|
+
'gpt-4-turbo-2024-04-09',
|
11
|
+
'gpt-4-turbo-preview',
|
12
|
+
'gpt-4o',
|
13
|
+
'gpt-4o-2024-05-13',
|
14
|
+
'gpt-4o-2024-08-06',
|
15
|
+
'gpt-4o-mini',
|
16
|
+
'gpt-4o-mini-2024-07-18'
|
17
|
+
].freeze
|
18
|
+
|
19
|
+
# Anthropic vision-capable models
|
20
|
+
ANTHROPIC_VISION_MODELS = [
|
21
|
+
'claude-3-opus-20240229',
|
22
|
+
'claude-3-sonnet-20240229',
|
23
|
+
'claude-3-haiku-20240307',
|
24
|
+
'claude-3-5-sonnet-20241022',
|
25
|
+
'claude-3-5-sonnet-20240620',
|
26
|
+
'claude-3-5-haiku-20241022'
|
27
|
+
].freeze
|
28
|
+
|
29
|
+
def self.supports_vision?(provider, model)
|
30
|
+
case provider.to_s.downcase
|
31
|
+
when 'openai'
|
32
|
+
OPENAI_VISION_MODELS.any? { |m| model.include?(m) }
|
33
|
+
when 'anthropic'
|
34
|
+
ANTHROPIC_VISION_MODELS.any? { |m| model.include?(m) }
|
35
|
+
else
|
36
|
+
false
|
37
|
+
end
|
38
|
+
end
|
39
|
+
|
40
|
+
def self.validate_vision_support!(provider, model)
|
41
|
+
unless supports_vision?(provider, model)
|
42
|
+
raise ArgumentError, "Model #{model} does not support vision. Vision-capable models for #{provider}: #{vision_models_for(provider).join(', ')}"
|
43
|
+
end
|
44
|
+
end
|
45
|
+
|
46
|
+
def self.vision_models_for(provider)
|
47
|
+
case provider.to_s.downcase
|
48
|
+
when 'openai'
|
49
|
+
OPENAI_VISION_MODELS
|
50
|
+
when 'anthropic'
|
51
|
+
ANTHROPIC_VISION_MODELS
|
52
|
+
else
|
53
|
+
[]
|
54
|
+
end
|
55
|
+
end
|
56
|
+
end
|
57
|
+
end
|
58
|
+
end
|
data/lib/dspy/version.rb
CHANGED
data/lib/dspy.rb
CHANGED
@@ -124,6 +124,7 @@ require_relative 'dspy/few_shot_example'
|
|
124
124
|
require_relative 'dspy/prompt'
|
125
125
|
require_relative 'dspy/example'
|
126
126
|
require_relative 'dspy/lm'
|
127
|
+
require_relative 'dspy/image'
|
127
128
|
require_relative 'dspy/strategy'
|
128
129
|
require_relative 'dspy/prediction'
|
129
130
|
require_relative 'dspy/predict'
|
metadata
CHANGED
@@ -1,13 +1,13 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: dspy
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.16.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Vicente Reig Rincón de Arellano
|
8
8
|
bindir: bin
|
9
9
|
cert_chain: []
|
10
|
-
date: 2025-08-
|
10
|
+
date: 2025-08-08 00:00:00.000000000 Z
|
11
11
|
dependencies:
|
12
12
|
- !ruby/object:Gem::Dependency
|
13
13
|
name: dry-configurable
|
@@ -165,6 +165,7 @@ files:
|
|
165
165
|
- lib/dspy/example.rb
|
166
166
|
- lib/dspy/few_shot_example.rb
|
167
167
|
- lib/dspy/field.rb
|
168
|
+
- lib/dspy/image.rb
|
168
169
|
- lib/dspy/instrumentation.rb
|
169
170
|
- lib/dspy/instrumentation/event_payload_factory.rb
|
170
171
|
- lib/dspy/instrumentation/event_payloads.rb
|
@@ -190,6 +191,7 @@ files:
|
|
190
191
|
- lib/dspy/lm/strategy_selector.rb
|
191
192
|
- lib/dspy/lm/structured_output_strategy.rb
|
192
193
|
- lib/dspy/lm/usage.rb
|
194
|
+
- lib/dspy/lm/vision_models.rb
|
193
195
|
- lib/dspy/memory.rb
|
194
196
|
- lib/dspy/memory/embedding_engine.rb
|
195
197
|
- lib/dspy/memory/in_memory_store.rb
|