dspy 0.33.0 → 0.34.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 52dc686ff0347f7844a3b6fc476b31737f3467d5d179974f34a98b8dbbd12073
4
- data.tar.gz: 0e39c94a4766c481167268f49e42277d297b688ee9f960181785062e69f91572
3
+ metadata.gz: 100e82f4aeff8020a845aa80a63ad86278ead32f34b7846b0624db99dc060325
4
+ data.tar.gz: 43ed18798e67e829e2decdd8ef0519751d6f4fc2cf52571850f1acfd671f2780
5
5
  SHA512:
6
- metadata.gz: bb4fb2ce89ed600e971a07cfabe3eb9edd344563aa77df57304dbec565121eeb9c8a53ba4cdd66f04c81cb3b1231d222a59fb4a15962680051c07de49c080dca
7
- data.tar.gz: 2543dd3bc228c98a1ab82c14ce8fffbed86342fa661af674976b90b10752334a5606257401f9ff953bd82f36aa29fe816895acec9e30a5219e8c8dc2d3ea1727
6
+ metadata.gz: 3081d9fada92dcf1f7f5b003212fd6d5b94787b6982c6828bd95704e84f197be3017d08eff81c4c1271e4a1d442e6a235973f4c2ab69c47493e025b2d155b1ca
7
+ data.tar.gz: 32454139be26918608d4c63f58a706d762caa457ca3df9addb238eb9c9bfd5e97f749054923573838b6e983db129732de290387076423e39293bcf02e2747bc4
data/README.md CHANGED
@@ -3,7 +3,7 @@
3
3
  [![Gem Version](https://img.shields.io/gem/v/dspy)](https://rubygems.org/gems/dspy)
4
4
  [![Total Downloads](https://img.shields.io/gem/dt/dspy)](https://rubygems.org/gems/dspy)
5
5
  [![Build Status](https://img.shields.io/github/actions/workflow/status/vicentereig/dspy.rb/ruby.yml?branch=main&label=build)](https://github.com/vicentereig/dspy.rb/actions/workflows/ruby.yml)
6
- [![Documentation](https://img.shields.io/badge/docs-vicentereig.github.io%2Fdspy.rb-blue)](https://vicentereig.github.io/dspy.rb/)
6
+ [![Documentation](https://img.shields.io/badge/docs-oss.vicente.services%2Fdspy.rb-blue)](https://oss.vicente.services/dspy.rb/)
7
7
  [![Discord](https://img.shields.io/discord/1161519468141355160?label=discord&logo=discord&logoColor=white)](https://discord.gg/zWBhrMqn)
8
8
 
9
9
  > [!NOTE]
@@ -248,13 +248,24 @@ DSPy.rb has gone from experimental to production-ready in three fast releases.
248
248
 
249
249
  ## Documentation
250
250
 
251
- 📖 **[Complete Documentation Website](https://vicentereig.github.io/dspy.rb/)**
251
+ 📖 **[Complete Documentation Website](https://oss.vicente.services/dspy.rb/)**
252
252
 
253
253
  ### LLM-Friendly Documentation
254
254
 
255
255
  For LLMs and AI assistants working with DSPy.rb:
256
- - **[llms.txt](https://vicentereig.github.io/dspy.rb/llms.txt)** - Concise reference optimized for LLMs
257
- - **[llms-full.txt](https://vicentereig.github.io/dspy.rb/llms-full.txt)** - Comprehensive API documentation
256
+ - **[llms.txt](https://oss.vicente.services/dspy.rb/llms.txt)** - Concise reference optimized for LLMs
257
+ - **[llms-full.txt](https://oss.vicente.services/dspy.rb/llms-full.txt)** - Comprehensive API documentation
258
+
259
+ ### Claude Skill
260
+
261
+ A [Claude Skill](https://github.com/vicentereig/dspy-rb-skill) is available to help you build DSPy.rb applications with Claude Code or claude.ai.
262
+
263
+ **Claude Code:**
264
+ ```bash
265
+ git clone https://github.com/vicentereig/dspy-rb-skill ~/.claude/skills/dspy-rb
266
+ ```
267
+
268
+ **Claude.ai (Pro/Max):** Download the [skill as a ZIP](https://github.com/vicentereig/dspy-rb-skill/archive/refs/heads/main.zip) and upload via Settings > Skills.
258
269
 
259
270
  ### Getting Started
260
271
  - **[Installation & Setup](docs/src/getting-started/installation.md)** - Detailed installation and configuration
data/lib/dspy/evals.rb CHANGED
@@ -191,6 +191,12 @@ module DSPy
191
191
  sig { returns(T.nilable(BatchEvaluationResult)) }
192
192
  attr_reader :last_batch_result
193
193
 
194
+ sig { returns(T::Boolean) }
195
+ attr_reader :export_scores
196
+
197
+ sig { returns(String) }
198
+ attr_reader :score_name
199
+
194
200
  include DSPy::Callbacks
195
201
 
196
202
  create_before_callback :call, wrap: false
@@ -227,16 +233,20 @@ module DSPy
227
233
  num_threads: T.nilable(Integer),
228
234
  max_errors: T.nilable(Integer),
229
235
  failure_score: T.nilable(Numeric),
230
- provide_traceback: T::Boolean
236
+ provide_traceback: T::Boolean,
237
+ export_scores: T::Boolean,
238
+ score_name: String
231
239
  ).void
232
240
  end
233
- def initialize(program, metric: nil, num_threads: 1, max_errors: 5, failure_score: 0.0, provide_traceback: true)
241
+ def initialize(program, metric: nil, num_threads: 1, max_errors: 5, failure_score: 0.0, provide_traceback: true, export_scores: false, score_name: 'evaluation')
234
242
  @program = program
235
243
  @metric = metric
236
244
  @num_threads = num_threads || 1
237
245
  @max_errors = max_errors || 5
238
246
  @provide_traceback = provide_traceback
239
247
  @failure_score = failure_score ? failure_score.to_f : 0.0
248
+ @export_scores = export_scores
249
+ @score_name = score_name
240
250
  @last_example_result = nil
241
251
  @last_batch_result = nil
242
252
  end
@@ -665,6 +675,11 @@ module DSPy
665
675
  score: result.metrics[:score],
666
676
  error: result.metrics[:error]
667
677
  })
678
+
679
+ # Export score to Langfuse if enabled
680
+ if @export_scores
681
+ export_example_score(example, result)
682
+ end
668
683
  rescue => e
669
684
  DSPy.log('evals.example.observation_error', error: e.message)
670
685
  end
@@ -678,10 +693,38 @@ module DSPy
678
693
  pass_rate: batch_result.pass_rate,
679
694
  score: batch_result.score
680
695
  })
696
+
697
+ # Export batch score to Langfuse if enabled
698
+ if @export_scores
699
+ export_batch_score(batch_result)
700
+ end
681
701
  rescue => e
682
702
  DSPy.log('evals.batch.observation_error', error: e.message)
683
703
  end
684
704
 
705
+ def export_example_score(example, result)
706
+ score_value = result.metrics[:score] || (result.passed ? 1.0 : 0.0)
707
+ example_id = extract_example_id(example)
708
+
709
+ DSPy.score(
710
+ @score_name,
711
+ score_value,
712
+ comment: "Example: #{example_id || 'unknown'}, passed: #{result.passed}"
713
+ )
714
+ rescue => e
715
+ DSPy.log('evals.score_export_error', error: e.message)
716
+ end
717
+
718
+ def export_batch_score(batch_result)
719
+ DSPy.score(
720
+ "#{@score_name}_batch",
721
+ batch_result.pass_rate,
722
+ comment: "Batch: #{batch_result.passed_examples}/#{batch_result.total_examples} passed"
723
+ )
724
+ rescue => e
725
+ DSPy.log('evals.batch_score_export_error', error: e.message)
726
+ end
727
+
685
728
  def extract_example_id(example)
686
729
  if example.respond_to?(:id)
687
730
  example.id
@@ -0,0 +1,58 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'sorbet-runtime'
4
+
5
+ module DSPy
6
+ module Ext
7
+ # Extends T::Struct to support field descriptions via the :description kwarg.
8
+ #
9
+ # This module is prepended to T::Struct to intercept const/prop definitions
10
+ # and capture descriptions before they reach Sorbet (which doesn't support them).
11
+ #
12
+ # @example
13
+ # class ASTNode < T::Struct
14
+ # const :node_type, String, description: 'The type of AST node'
15
+ # const :text, String, default: "", description: 'Text content of the node'
16
+ # const :children, T::Array[ASTNode], default: []
17
+ # end
18
+ #
19
+ # ASTNode.field_descriptions[:node_type] # => "The type of AST node"
20
+ # ASTNode.field_descriptions[:text] # => "Text content of the node"
21
+ # ASTNode.field_descriptions[:children] # => nil (no description)
22
+ #
23
+ module StructDescriptions
24
+ def self.prepended(base)
25
+ base.singleton_class.prepend(ClassMethods)
26
+ end
27
+
28
+ module ClassMethods
29
+ # Returns a hash of field names to their descriptions.
30
+ # Only fields with explicit :description kwargs are included.
31
+ #
32
+ # @return [Hash{Symbol => String}]
33
+ def field_descriptions
34
+ @field_descriptions ||= {}
35
+ end
36
+
37
+ # Intercepts const definitions to capture :description before Sorbet sees it.
38
+ def const(name, type, **kwargs)
39
+ if kwargs.key?(:description)
40
+ field_descriptions[name] = kwargs.delete(:description)
41
+ end
42
+ super(name, type, **kwargs)
43
+ end
44
+
45
+ # Intercepts prop definitions to capture :description before Sorbet sees it.
46
+ def prop(name, type, **kwargs)
47
+ if kwargs.key?(:description)
48
+ field_descriptions[name] = kwargs.delete(:description)
49
+ end
50
+ super(name, type, **kwargs)
51
+ end
52
+ end
53
+ end
54
+ end
55
+ end
56
+
57
+ # Apply the extension to T::Struct globally
58
+ T::Struct.prepend(DSPy::Ext::StructDescriptions)
@@ -12,10 +12,33 @@ module DSPy
12
12
  extend T::Sig
13
13
  extend T::Helpers
14
14
 
15
+ # Result type that includes both schema and any accumulated definitions
16
+ class SchemaResult < T::Struct
17
+ const :schema, T::Hash[Symbol, T.untyped]
18
+ const :definitions, T::Hash[String, T::Hash[Symbol, T.untyped]], default: {}
19
+ end
20
+
21
+ # Convert a Sorbet type to JSON Schema format with definitions tracking
22
+ # Returns a SchemaResult with the schema and any $defs needed
23
+ sig { params(type: T.untyped, visited: T.nilable(T::Set[T.untyped]), definitions: T.nilable(T::Hash[String, T::Hash[Symbol, T.untyped]])).returns(SchemaResult) }
24
+ def self.type_to_json_schema_with_defs(type, visited = nil, definitions = nil)
25
+ visited ||= Set.new
26
+ definitions ||= {}
27
+ schema = type_to_json_schema_internal(type, visited, definitions)
28
+ SchemaResult.new(schema: schema, definitions: definitions)
29
+ end
30
+
15
31
  # Convert a Sorbet type to JSON Schema format
32
+ # For backward compatibility, this method returns just the schema hash
16
33
  sig { params(type: T.untyped, visited: T.nilable(T::Set[T.untyped])).returns(T::Hash[Symbol, T.untyped]) }
17
34
  def self.type_to_json_schema(type, visited = nil)
18
35
  visited ||= Set.new
36
+ type_to_json_schema_internal(type, visited, {})
37
+ end
38
+
39
+ # Internal implementation that tracks definitions
40
+ sig { params(type: T.untyped, visited: T::Set[T.untyped], definitions: T::Hash[String, T::Hash[Symbol, T.untyped]]).returns(T::Hash[Symbol, T.untyped]) }
41
+ def self.type_to_json_schema_internal(type, visited, definitions)
19
42
 
20
43
  # Handle T::Boolean type alias first
21
44
  if type == T::Boolean
@@ -24,7 +47,7 @@ module DSPy
24
47
 
25
48
  # Handle type aliases by resolving to their underlying type
26
49
  if type.is_a?(T::Private::Types::TypeAlias)
27
- return self.type_to_json_schema(type.aliased_type, visited)
50
+ return type_to_json_schema_internal(type.aliased_type, visited, definitions)
28
51
  end
29
52
 
30
53
  # Handle raw class types first
@@ -54,12 +77,13 @@ module DSPy
54
77
  # Check for recursion
55
78
  if visited.include?(type)
56
79
  # Return a reference to avoid infinite recursion
80
+ # Use #/$defs/ format for OpenAI/Gemini compatibility
81
+ simple_name = type.name.split('::').last
57
82
  {
58
- "$ref" => "#/definitions/#{type.name.split('::').last}",
59
- description: "Recursive reference to #{type.name}"
83
+ "$ref" => "#/$defs/#{simple_name}"
60
84
  }
61
85
  else
62
- self.generate_struct_schema(type, visited)
86
+ generate_struct_schema_internal(type, visited, definitions)
63
87
  end
64
88
  else
65
89
  { type: "string" } # Default fallback
@@ -93,12 +117,13 @@ module DSPy
93
117
  elsif type.raw_type < T::Struct
94
118
  # Handle custom T::Struct classes
95
119
  if visited.include?(type.raw_type)
120
+ # Use #/$defs/ format for OpenAI/Gemini compatibility
121
+ simple_name = type.raw_type.name.split('::').last
96
122
  {
97
- "$ref" => "#/definitions/#{type.raw_type.name.split('::').last}",
98
- description: "Recursive reference to #{type.raw_type.name}"
123
+ "$ref" => "#/$defs/#{simple_name}"
99
124
  }
100
125
  else
101
- generate_struct_schema(type.raw_type, visited)
126
+ generate_struct_schema_internal(type.raw_type, visited, definitions)
102
127
  end
103
128
  else
104
129
  { type: "string" } # Default fallback
@@ -108,13 +133,13 @@ module DSPy
108
133
  # Handle arrays properly with nested item type
109
134
  {
110
135
  type: "array",
111
- items: self.type_to_json_schema(type.type, visited)
136
+ items: type_to_json_schema_internal(type.type, visited, definitions)
112
137
  }
113
138
  elsif type.is_a?(T::Types::TypedHash)
114
139
  # Handle hashes as objects with additionalProperties
115
140
  # TypedHash has keys and values methods to access its key and value types
116
141
  # Note: propertyNames is NOT supported by OpenAI structured outputs, so we omit it
117
- value_schema = self.type_to_json_schema(type.values, visited)
142
+ value_schema = type_to_json_schema_internal(type.values, visited, definitions)
118
143
  key_type_desc = type.keys.respond_to?(:raw_type) ? type.keys.raw_type.to_s : "string"
119
144
  value_type_desc = value_schema[:description] || value_schema[:type].to_s
120
145
 
@@ -129,9 +154,9 @@ module DSPy
129
154
  # Handle fixed hashes (from type aliases like { "key" => Type })
130
155
  properties = {}
131
156
  required = []
132
-
157
+
133
158
  type.types.each do |key, value_type|
134
- properties[key] = self.type_to_json_schema(value_type, visited)
159
+ properties[key] = type_to_json_schema_internal(value_type, visited, definitions)
135
160
  required << key
136
161
  end
137
162
 
@@ -155,9 +180,9 @@ module DSPy
155
180
  !(t.respond_to?(:raw_type) && t.raw_type == NilClass) &&
156
181
  !(t.respond_to?(:name) && t.name == "NilClass")
157
182
  end
158
-
183
+
159
184
  if non_nil_type
160
- base_schema = self.type_to_json_schema(non_nil_type, visited)
185
+ base_schema = type_to_json_schema_internal(non_nil_type, visited, definitions)
161
186
  if base_schema[:type].is_a?(String)
162
187
  # Convert single type to array with null
163
188
  { type: [base_schema[:type], "null"] }.merge(base_schema.except(:type))
@@ -173,13 +198,13 @@ module DSPy
173
198
  # Generate oneOf schema for all types
174
199
  if type.respond_to?(:types) && type.types.length > 1
175
200
  {
176
- oneOf: type.types.map { |t| self.type_to_json_schema(t, visited) },
201
+ oneOf: type.types.map { |t| type_to_json_schema_internal(t, visited, definitions) },
177
202
  description: "Union of multiple types"
178
203
  }
179
204
  else
180
205
  # Single type or fallback
181
206
  first_type = type.respond_to?(:types) ? type.types.first : type
182
- self.type_to_json_schema(first_type, visited)
207
+ type_to_json_schema_internal(first_type, visited, definitions)
183
208
  end
184
209
  end
185
210
  elsif type.is_a?(T::Types::Union)
@@ -200,7 +225,7 @@ module DSPy
200
225
 
201
226
  if non_nil_types.size == 1 && is_nilable
202
227
  # This is T.nilable(SomeType) - generate proper schema with null allowed
203
- base_schema = self.type_to_json_schema(non_nil_types.first, visited)
228
+ base_schema = type_to_json_schema_internal(non_nil_types.first, visited, definitions)
204
229
  if base_schema[:type].is_a?(String)
205
230
  # Convert single type to array with null
206
231
  { type: [base_schema[:type], "null"] }.merge(base_schema.except(:type))
@@ -210,11 +235,11 @@ module DSPy
210
235
  end
211
236
  elsif non_nil_types.size == 1
212
237
  # Non-nilable single type union (shouldn't happen in practice)
213
- self.type_to_json_schema(non_nil_types.first, visited)
238
+ type_to_json_schema_internal(non_nil_types.first, visited, definitions)
214
239
  elsif non_nil_types.size > 1
215
240
  # Handle complex unions with oneOf for better JSON schema compliance
216
241
  base_schema = {
217
- oneOf: non_nil_types.map { |t| self.type_to_json_schema(t, visited) },
242
+ oneOf: non_nil_types.map { |t| type_to_json_schema_internal(t, visited, definitions) },
218
243
  description: "Union of multiple types"
219
244
  }
220
245
  if is_nilable
@@ -237,12 +262,31 @@ module DSPy
237
262
  end
238
263
 
239
264
  # Generate JSON schema for custom T::Struct classes
265
+ # For backward compatibility, this returns just the schema hash
240
266
  sig { params(struct_class: T.class_of(T::Struct), visited: T.nilable(T::Set[T.untyped])).returns(T::Hash[Symbol, T.untyped]) }
241
267
  def self.generate_struct_schema(struct_class, visited = nil)
242
268
  visited ||= Set.new
243
-
269
+ generate_struct_schema_internal(struct_class, visited, {})
270
+ end
271
+
272
+ # Generate JSON schema with $defs tracking
273
+ # Returns a SchemaResult with schema and accumulated definitions
274
+ sig { params(struct_class: T.class_of(T::Struct), visited: T.nilable(T::Set[T.untyped]), definitions: T.nilable(T::Hash[String, T::Hash[Symbol, T.untyped]])).returns(SchemaResult) }
275
+ def self.generate_struct_schema_with_defs(struct_class, visited = nil, definitions = nil)
276
+ visited ||= Set.new
277
+ definitions ||= {}
278
+ schema = generate_struct_schema_internal(struct_class, visited, definitions)
279
+ SchemaResult.new(schema: schema, definitions: definitions)
280
+ end
281
+
282
+ # Internal implementation that tracks definitions for $defs
283
+ sig { params(struct_class: T.class_of(T::Struct), visited: T::Set[T.untyped], definitions: T::Hash[String, T::Hash[Symbol, T.untyped]]).returns(T::Hash[Symbol, T.untyped]) }
284
+ def self.generate_struct_schema_internal(struct_class, visited, definitions)
244
285
  return { type: "string", description: "Struct (schema introspection not available)" } unless struct_class.respond_to?(:props)
245
286
 
287
+ struct_name = struct_class.name || "Struct#{format('%x', struct_class.object_id)}"
288
+ simple_name = struct_name.split('::').last || struct_name
289
+
246
290
  # Add this struct to visited set to detect recursion
247
291
  visited.add(struct_class)
248
292
 
@@ -255,9 +299,6 @@ module DSPy
255
299
  "DSPy uses _type for automatic type detection in union types."
256
300
  end
257
301
 
258
- struct_name = struct_class.name || "Struct#{format('%x', struct_class.object_id)}"
259
- simple_name = struct_name.split('::').last || struct_name
260
-
261
302
  # Add automatic _type field for type detection
262
303
  properties[:_type] = {
263
304
  type: "string",
@@ -265,10 +306,20 @@ module DSPy
265
306
  }
266
307
  required << "_type"
267
308
 
309
+ # Get field descriptions if the struct supports them (via DSPy::Ext::StructDescriptions)
310
+ field_descs = struct_class.respond_to?(:field_descriptions) ? struct_class.field_descriptions : {}
311
+
268
312
  struct_class.props.each do |prop_name, prop_info|
269
313
  prop_type = prop_info[:type_object] || prop_info[:type]
270
- properties[prop_name] = self.type_to_json_schema(prop_type, visited)
271
-
314
+ prop_schema = type_to_json_schema_internal(prop_type, visited, definitions)
315
+
316
+ # Add field description if available
317
+ if field_descs[prop_name]
318
+ prop_schema[:description] = field_descs[prop_name]
319
+ end
320
+
321
+ properties[prop_name] = prop_schema
322
+
272
323
  # A field is required if it's not fully optional
273
324
  # fully_optional is true for nilable prop fields
274
325
  # immutable const fields are required unless nilable
@@ -280,12 +331,18 @@ module DSPy
280
331
  # Remove this struct from visited set after processing
281
332
  visited.delete(struct_class)
282
333
 
283
- {
334
+ schema = {
284
335
  type: "object",
285
336
  properties: properties,
286
337
  required: required,
287
338
  description: "#{struct_name} struct"
288
339
  }
340
+
341
+ # Add this struct's schema to definitions for $defs
342
+ # This allows recursive references to be resolved
343
+ definitions[simple_name] = schema
344
+
345
+ schema
289
346
  end
290
347
 
291
348
  private
@@ -0,0 +1,30 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'sorbet-runtime'
4
+
5
+ module DSPy
6
+ module Scores
7
+ # Langfuse score data types
8
+ # Maps to: NUMERIC, BOOLEAN, CATEGORICAL
9
+ class DataType < T::Enum
10
+ extend T::Sig
11
+
12
+ enums do
13
+ Numeric = new('NUMERIC')
14
+ Boolean = new('BOOLEAN')
15
+ Categorical = new('CATEGORICAL')
16
+ end
17
+
18
+ sig { params(value: String).returns(DataType) }
19
+ def self.deserialize(value)
20
+ case value
21
+ when 'NUMERIC' then Numeric
22
+ when 'BOOLEAN' then Boolean
23
+ when 'CATEGORICAL' then Categorical
24
+ else
25
+ raise ArgumentError, "Unknown DataType: #{value}"
26
+ end
27
+ end
28
+ end
29
+ end
30
+ end
@@ -0,0 +1,279 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'sorbet-runtime'
4
+ require 'json'
5
+
6
+ module DSPy
7
+ module Scores
8
+ # Built-in evaluators for common evaluation patterns
9
+ # Each evaluator returns a ScoreEvent that can be exported to Langfuse
10
+ module Evaluators
11
+ extend T::Sig
12
+
13
+ # Exact string match evaluator
14
+ # Returns 1.0 if output exactly matches expected, 0.0 otherwise
15
+ sig do
16
+ params(
17
+ output: String,
18
+ expected: String,
19
+ name: String,
20
+ ignore_case: T::Boolean,
21
+ comment: T.nilable(String),
22
+ trace_id: T.nilable(String),
23
+ observation_id: T.nilable(String),
24
+ emit: T::Boolean
25
+ ).returns(ScoreEvent)
26
+ end
27
+ def self.exact_match(
28
+ output:,
29
+ expected:,
30
+ name: 'exact_match',
31
+ ignore_case: false,
32
+ comment: nil,
33
+ trace_id: nil,
34
+ observation_id: nil,
35
+ emit: true
36
+ )
37
+ match = if ignore_case
38
+ output.downcase == expected.downcase
39
+ else
40
+ output == expected
41
+ end
42
+
43
+ DSPy::Scores.create(
44
+ name: name,
45
+ value: match ? 1.0 : 0.0,
46
+ data_type: DataType::Numeric,
47
+ comment: comment || (match ? 'Exact match' : 'No match'),
48
+ trace_id: trace_id,
49
+ observation_id: observation_id,
50
+ emit: emit
51
+ )
52
+ end
53
+
54
+ # Substring containment evaluator
55
+ # Returns 1.0 if output contains expected, 0.0 otherwise
56
+ sig do
57
+ params(
58
+ output: String,
59
+ expected: String,
60
+ name: String,
61
+ ignore_case: T::Boolean,
62
+ comment: T.nilable(String),
63
+ trace_id: T.nilable(String),
64
+ observation_id: T.nilable(String),
65
+ emit: T::Boolean
66
+ ).returns(ScoreEvent)
67
+ end
68
+ def self.contains(
69
+ output:,
70
+ expected:,
71
+ name: 'contains',
72
+ ignore_case: false,
73
+ comment: nil,
74
+ trace_id: nil,
75
+ observation_id: nil,
76
+ emit: true
77
+ )
78
+ match = if ignore_case
79
+ output.downcase.include?(expected.downcase)
80
+ else
81
+ output.include?(expected)
82
+ end
83
+
84
+ DSPy::Scores.create(
85
+ name: name,
86
+ value: match ? 1.0 : 0.0,
87
+ data_type: DataType::Numeric,
88
+ comment: comment || (match ? 'Contains expected' : 'Does not contain expected'),
89
+ trace_id: trace_id,
90
+ observation_id: observation_id,
91
+ emit: emit
92
+ )
93
+ end
94
+
95
+ # Regular expression match evaluator
96
+ # Returns 1.0 if output matches pattern, 0.0 otherwise
97
+ sig do
98
+ params(
99
+ output: String,
100
+ pattern: T.any(Regexp, String),
101
+ name: String,
102
+ comment: T.nilable(String),
103
+ trace_id: T.nilable(String),
104
+ observation_id: T.nilable(String),
105
+ emit: T::Boolean
106
+ ).returns(ScoreEvent)
107
+ end
108
+ def self.regex_match(
109
+ output:,
110
+ pattern:,
111
+ name: 'regex_match',
112
+ comment: nil,
113
+ trace_id: nil,
114
+ observation_id: nil,
115
+ emit: true
116
+ )
117
+ regex = pattern.is_a?(Regexp) ? pattern : Regexp.new(pattern)
118
+ match = regex.match?(output)
119
+
120
+ DSPy::Scores.create(
121
+ name: name,
122
+ value: match ? 1.0 : 0.0,
123
+ data_type: DataType::Numeric,
124
+ comment: comment || (match ? 'Regex matched' : 'Regex did not match'),
125
+ trace_id: trace_id,
126
+ observation_id: observation_id,
127
+ emit: emit
128
+ )
129
+ end
130
+
131
+ # Length check evaluator
132
+ # Returns 1.0 if output length is within range, 0.0 otherwise
133
+ sig do
134
+ params(
135
+ output: String,
136
+ min_length: T.nilable(Integer),
137
+ max_length: T.nilable(Integer),
138
+ name: String,
139
+ comment: T.nilable(String),
140
+ trace_id: T.nilable(String),
141
+ observation_id: T.nilable(String),
142
+ emit: T::Boolean
143
+ ).returns(ScoreEvent)
144
+ end
145
+ def self.length_check(
146
+ output:,
147
+ min_length: nil,
148
+ max_length: nil,
149
+ name: 'length_check',
150
+ comment: nil,
151
+ trace_id: nil,
152
+ observation_id: nil,
153
+ emit: true
154
+ )
155
+ length = output.length
156
+ valid = true
157
+ valid = false if min_length && length < min_length
158
+ valid = false if max_length && length > max_length
159
+
160
+ DSPy::Scores.create(
161
+ name: name,
162
+ value: valid ? 1.0 : 0.0,
163
+ data_type: DataType::Numeric,
164
+ comment: comment || "Length: #{length} (min: #{min_length || 'none'}, max: #{max_length || 'none'})",
165
+ trace_id: trace_id,
166
+ observation_id: observation_id,
167
+ emit: emit
168
+ )
169
+ end
170
+
171
+ # Levenshtein similarity evaluator
172
+ # Returns normalized similarity score between 0.0 and 1.0
173
+ sig do
174
+ params(
175
+ output: String,
176
+ expected: String,
177
+ name: String,
178
+ comment: T.nilable(String),
179
+ trace_id: T.nilable(String),
180
+ observation_id: T.nilable(String),
181
+ emit: T::Boolean
182
+ ).returns(ScoreEvent)
183
+ end
184
+ def self.similarity(
185
+ output:,
186
+ expected:,
187
+ name: 'similarity',
188
+ comment: nil,
189
+ trace_id: nil,
190
+ observation_id: nil,
191
+ emit: true
192
+ )
193
+ distance = levenshtein_distance(output, expected)
194
+ max_length = [output.length, expected.length].max
195
+ score = max_length.zero? ? 1.0 : 1.0 - (distance.to_f / max_length)
196
+
197
+ DSPy::Scores.create(
198
+ name: name,
199
+ value: score.round(4),
200
+ data_type: DataType::Numeric,
201
+ comment: comment || "Levenshtein distance: #{distance}",
202
+ trace_id: trace_id,
203
+ observation_id: observation_id,
204
+ emit: emit
205
+ )
206
+ end
207
+
208
+ # JSON validity evaluator
209
+ # Returns 1.0 if output is valid JSON, 0.0 otherwise
210
+ sig do
211
+ params(
212
+ output: String,
213
+ name: String,
214
+ comment: T.nilable(String),
215
+ trace_id: T.nilable(String),
216
+ observation_id: T.nilable(String),
217
+ emit: T::Boolean
218
+ ).returns(ScoreEvent)
219
+ end
220
+ def self.json_valid(
221
+ output:,
222
+ name: 'json_valid',
223
+ comment: nil,
224
+ trace_id: nil,
225
+ observation_id: nil,
226
+ emit: true
227
+ )
228
+ valid = begin
229
+ JSON.parse(output)
230
+ true
231
+ rescue JSON::ParserError
232
+ false
233
+ end
234
+
235
+ DSPy::Scores.create(
236
+ name: name,
237
+ value: valid ? 1.0 : 0.0,
238
+ data_type: DataType::Numeric,
239
+ comment: comment || (valid ? 'Valid JSON' : 'Invalid JSON'),
240
+ trace_id: trace_id,
241
+ observation_id: observation_id,
242
+ emit: emit
243
+ )
244
+ end
245
+
246
+ # Levenshtein distance implementation
247
+ sig { params(str1: String, str2: String).returns(Integer) }
248
+ def self.levenshtein_distance(str1, str2)
249
+ m = str1.length
250
+ n = str2.length
251
+
252
+ return n if m.zero?
253
+ return m if n.zero?
254
+
255
+ # Create distance matrix
256
+ d = Array.new(m + 1) { Array.new(n + 1, 0) }
257
+
258
+ # Initialize first column
259
+ (0..m).each { |i| d[i][0] = i }
260
+ # Initialize first row
261
+ (0..n).each { |j| d[0][j] = j }
262
+
263
+ # Fill in the rest of the matrix
264
+ (1..m).each do |i|
265
+ (1..n).each do |j|
266
+ cost = str1[i - 1] == str2[j - 1] ? 0 : 1
267
+ d[i][j] = [
268
+ d[i - 1][j] + 1, # deletion
269
+ d[i][j - 1] + 1, # insertion
270
+ d[i - 1][j - 1] + cost # substitution
271
+ ].min
272
+ end
273
+ end
274
+
275
+ d[m][n]
276
+ end
277
+ end
278
+ end
279
+ end
@@ -0,0 +1,56 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'sorbet-runtime'
4
+ require 'securerandom'
5
+ require_relative 'data_type'
6
+
7
+ module DSPy
8
+ module Scores
9
+ # Represents a score to be sent to Langfuse
10
+ # Immutable struct with all score attributes
11
+ class ScoreEvent < T::Struct
12
+ extend T::Sig
13
+
14
+ # Unique identifier for the score (idempotency key)
15
+ prop :id, String, factory: -> { SecureRandom.uuid }
16
+
17
+ # Score name/identifier (required)
18
+ prop :name, String
19
+
20
+ # Score value - numeric, boolean (0/1), or categorical (string)
21
+ prop :value, T.any(Numeric, String)
22
+
23
+ # Data type for the score
24
+ prop :data_type, DataType, default: DataType::Numeric
25
+
26
+ # Optional human-readable comment
27
+ prop :comment, T.nilable(String), default: nil
28
+
29
+ # Trace ID to link the score to (required for Langfuse)
30
+ prop :trace_id, T.nilable(String), default: nil
31
+
32
+ # Observation/span ID to link the score to (optional)
33
+ prop :observation_id, T.nilable(String), default: nil
34
+
35
+ # Timestamp when the score was created
36
+ prop :timestamp, Time, factory: -> { Time.now }
37
+
38
+ # Serialize to Langfuse API payload format
39
+ sig { returns(T::Hash[Symbol, T.untyped]) }
40
+ def to_langfuse_payload
41
+ payload = {
42
+ id: id,
43
+ name: name,
44
+ value: value,
45
+ dataType: data_type.serialize
46
+ }
47
+
48
+ payload[:comment] = comment if comment
49
+ payload[:traceId] = trace_id if trace_id
50
+ payload[:observationId] = observation_id if observation_id
51
+
52
+ payload
53
+ end
54
+ end
55
+ end
56
+ end
@@ -0,0 +1,135 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative 'scores/data_type'
4
+ require_relative 'scores/score_event'
5
+ require_relative 'scores/evaluators'
6
+
7
+ module DSPy
8
+ # Score reporting for Langfuse integration
9
+ # Provides a simple API for creating and exporting evaluation scores
10
+ module Scores
11
+ extend T::Sig
12
+
13
+ class << self
14
+ extend T::Sig
15
+
16
+ # Create a score event from the current context
17
+ #
18
+ # @param name [String] Score identifier (e.g., "accuracy", "relevance")
19
+ # @param value [Numeric, String] Score value
20
+ # @param data_type [DataType] Type of score (default: Numeric)
21
+ # @param comment [String, nil] Optional human-readable comment
22
+ # @param span [Object, nil] Optional span to attach score to
23
+ # @param emit [Boolean] Whether to emit score.create event (default: true)
24
+ # @return [ScoreEvent] The created score event
25
+ sig do
26
+ params(
27
+ name: String,
28
+ value: T.any(Numeric, String),
29
+ data_type: DataType,
30
+ comment: T.nilable(String),
31
+ span: T.untyped,
32
+ trace_id: T.nilable(String),
33
+ observation_id: T.nilable(String),
34
+ emit: T::Boolean
35
+ ).returns(ScoreEvent)
36
+ end
37
+ def create(
38
+ name:,
39
+ value:,
40
+ data_type: DataType::Numeric,
41
+ comment: nil,
42
+ span: nil,
43
+ trace_id: nil,
44
+ observation_id: nil,
45
+ emit: true
46
+ )
47
+ # Extract trace_id from context if not provided
48
+ resolved_trace_id = trace_id || extract_trace_id_from_context
49
+ resolved_observation_id = observation_id || extract_observation_id_from_span(span)
50
+
51
+ event = ScoreEvent.new(
52
+ name: name,
53
+ value: value,
54
+ data_type: data_type,
55
+ comment: comment,
56
+ trace_id: resolved_trace_id,
57
+ observation_id: resolved_observation_id
58
+ )
59
+
60
+ # Emit score.create event for listeners and exporters
61
+ emit_score_event(event) if emit
62
+
63
+ event
64
+ end
65
+
66
+ private
67
+
68
+ sig { returns(T.nilable(String)) }
69
+ def extract_trace_id_from_context
70
+ return nil unless defined?(DSPy::Context)
71
+
72
+ DSPy::Context.current[:trace_id]
73
+ rescue StandardError
74
+ nil
75
+ end
76
+
77
+ sig { params(span: T.untyped).returns(T.nilable(String)) }
78
+ def extract_observation_id_from_span(span)
79
+ return nil unless span
80
+
81
+ if span.respond_to?(:context) && span.context.respond_to?(:span_id)
82
+ span.context.span_id
83
+ elsif span.respond_to?(:span_id)
84
+ span.span_id
85
+ end
86
+ rescue StandardError
87
+ nil
88
+ end
89
+
90
+ sig { params(event: ScoreEvent).void }
91
+ def emit_score_event(event)
92
+ return unless defined?(DSPy) && DSPy.respond_to?(:events)
93
+
94
+ DSPy.events.notify('score.create', {
95
+ score_id: event.id,
96
+ score_name: event.name,
97
+ score_value: event.value,
98
+ score_data_type: event.data_type.serialize,
99
+ score_comment: event.comment,
100
+ trace_id: event.trace_id,
101
+ observation_id: event.observation_id,
102
+ timestamp: event.timestamp.iso8601
103
+ })
104
+ rescue StandardError => e
105
+ DSPy.log('score.emit_error', error: e.message) if DSPy.respond_to?(:log)
106
+ end
107
+ end
108
+ end
109
+
110
+ # Top-level convenience method for creating scores
111
+ #
112
+ # @example Basic usage
113
+ # DSPy.score('accuracy', 0.95)
114
+ #
115
+ # @example With comment
116
+ # DSPy.score('accuracy', 0.95, comment: 'Exact match')
117
+ #
118
+ # @example Boolean score
119
+ # DSPy.score('is_valid', 1, data_type: DSPy::Scores::DataType::Boolean)
120
+ #
121
+ # @example Categorical score
122
+ # DSPy.score('sentiment', 'positive', data_type: DSPy::Scores::DataType::Categorical)
123
+ #
124
+ def self.score(name, value, data_type: Scores::DataType::Numeric, comment: nil, span: nil, trace_id: nil, observation_id: nil)
125
+ Scores.create(
126
+ name: name,
127
+ value: value,
128
+ data_type: data_type,
129
+ comment: comment,
130
+ span: span,
131
+ trace_id: trace_id,
132
+ observation_id: observation_id
133
+ )
134
+ end
135
+ end
@@ -174,6 +174,35 @@ module DSPy
174
174
  }
175
175
  end
176
176
 
177
+ # Returns output JSON schema with accumulated $defs for recursive types
178
+ # This is needed for providers like OpenAI and Gemini that require $defs at the root
179
+ sig { returns(DSPy::TypeSystem::SorbetJsonSchema::SchemaResult) }
180
+ def output_json_schema_with_defs
181
+ properties = {}
182
+ required = []
183
+ all_definitions = {}
184
+
185
+ @output_field_descriptors&.each do |name, descriptor|
186
+ result = DSPy::TypeSystem::SorbetJsonSchema.type_to_json_schema_with_defs(descriptor.type, nil, all_definitions)
187
+ schema = result.schema
188
+ schema[:description] = descriptor.description if descriptor.description
189
+ properties[name] = schema
190
+ required << name.to_s unless descriptor.has_default
191
+ end
192
+
193
+ final_schema = {
194
+ "$schema": "http://json-schema.org/draft-06/schema#",
195
+ type: "object",
196
+ properties: properties,
197
+ required: required
198
+ }
199
+
200
+ DSPy::TypeSystem::SorbetJsonSchema::SchemaResult.new(
201
+ schema: final_schema,
202
+ definitions: all_definitions
203
+ )
204
+ end
205
+
177
206
  sig { returns(T.nilable(T.class_of(T::Struct))) }
178
207
  def output_schema
179
208
  @output_struct_class
data/lib/dspy/version.rb CHANGED
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module DSPy
4
- VERSION = "0.33.0"
4
+ VERSION = "0.34.1"
5
5
  end
data/lib/dspy.rb CHANGED
@@ -5,6 +5,9 @@ require 'dry-configurable'
5
5
  require 'dry/logger'
6
6
  require 'securerandom'
7
7
 
8
+ # Extensions to core classes (must be loaded early)
9
+ require_relative 'dspy/ext/struct_descriptions'
10
+
8
11
  require_relative 'dspy/version'
9
12
  require_relative 'dspy/errors'
10
13
  require_relative 'dspy/type_serializer'
@@ -223,6 +226,7 @@ require_relative 'dspy/events/subscriber_mixin'
223
226
  require_relative 'dspy/chain_of_thought'
224
227
  require_relative 'dspy/re_act'
225
228
  require_relative 'dspy/evals'
229
+ require_relative 'dspy/scores'
226
230
  require_relative 'dspy/teleprompt/teleprompter'
227
231
  require_relative 'dspy/teleprompt/utils'
228
232
  require_relative 'dspy/teleprompt/data_handler'
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: dspy
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.33.0
4
+ version: 0.34.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - Vicente Reig Rincón de Arellano
@@ -174,6 +174,7 @@ files:
174
174
  - lib/dspy/events/subscribers.rb
175
175
  - lib/dspy/events/types.rb
176
176
  - lib/dspy/example.rb
177
+ - lib/dspy/ext/struct_descriptions.rb
177
178
  - lib/dspy/few_shot_example.rb
178
179
  - lib/dspy/field.rb
179
180
  - lib/dspy/image.rb
@@ -219,6 +220,10 @@ files:
219
220
  - lib/dspy/schema/sorbet_toon_adapter.rb
220
221
  - lib/dspy/schema/version.rb
221
222
  - lib/dspy/schema_adapters.rb
223
+ - lib/dspy/scores.rb
224
+ - lib/dspy/scores/data_type.rb
225
+ - lib/dspy/scores/evaluators.rb
226
+ - lib/dspy/scores/score_event.rb
222
227
  - lib/dspy/signature.rb
223
228
  - lib/dspy/storage/program_storage.rb
224
229
  - lib/dspy/storage/storage_manager.rb