ruby_llm-contract 0.4.5 → 0.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (56) hide show
  1. checksums.yaml +4 -4
  2. data/.rubycritic.yml +8 -0
  3. data/.simplecov +22 -0
  4. data/CHANGELOG.md +19 -0
  5. data/Gemfile +2 -0
  6. data/Gemfile.lock +104 -2
  7. data/README.md +42 -2
  8. data/lib/ruby_llm/contract/concerns/context_helpers.rb +11 -10
  9. data/lib/ruby_llm/contract/concerns/deep_freeze.rb +13 -7
  10. data/lib/ruby_llm/contract/concerns/deep_symbolize.rb +15 -5
  11. data/lib/ruby_llm/contract/concerns/eval_host.rb +51 -7
  12. data/lib/ruby_llm/contract/contract/schema_validator/bound_rule.rb +85 -0
  13. data/lib/ruby_llm/contract/contract/schema_validator/enum_rule.rb +23 -0
  14. data/lib/ruby_llm/contract/contract/schema_validator/node.rb +70 -0
  15. data/lib/ruby_llm/contract/contract/schema_validator/object_rules.rb +66 -0
  16. data/lib/ruby_llm/contract/contract/schema_validator/scalar_rules.rb +22 -0
  17. data/lib/ruby_llm/contract/contract/schema_validator/schema_extractor.rb +23 -0
  18. data/lib/ruby_llm/contract/contract/schema_validator/type_rule.rb +30 -0
  19. data/lib/ruby_llm/contract/contract/schema_validator.rb +41 -266
  20. data/lib/ruby_llm/contract/contract/validator.rb +9 -0
  21. data/lib/ruby_llm/contract/eval/case_executor.rb +52 -0
  22. data/lib/ruby_llm/contract/eval/case_result_builder.rb +35 -0
  23. data/lib/ruby_llm/contract/eval/case_scorer.rb +66 -0
  24. data/lib/ruby_llm/contract/eval/evaluator/exact.rb +8 -6
  25. data/lib/ruby_llm/contract/eval/evaluator/proc_evaluator.rb +22 -10
  26. data/lib/ruby_llm/contract/eval/evaluator/regex.rb +11 -8
  27. data/lib/ruby_llm/contract/eval/expectation_evaluator.rb +26 -0
  28. data/lib/ruby_llm/contract/eval/prompt_diff.rb +39 -0
  29. data/lib/ruby_llm/contract/eval/prompt_diff_comparator.rb +116 -0
  30. data/lib/ruby_llm/contract/eval/prompt_diff_presenter.rb +99 -0
  31. data/lib/ruby_llm/contract/eval/prompt_diff_serializer.rb +23 -0
  32. data/lib/ruby_llm/contract/eval/report.rb +19 -191
  33. data/lib/ruby_llm/contract/eval/report_presenter.rb +65 -0
  34. data/lib/ruby_llm/contract/eval/report_stats.rb +65 -0
  35. data/lib/ruby_llm/contract/eval/report_storage.rb +107 -0
  36. data/lib/ruby_llm/contract/eval/runner.rb +30 -207
  37. data/lib/ruby_llm/contract/eval/step_expectation_applier.rb +67 -0
  38. data/lib/ruby_llm/contract/eval/step_result_normalizer.rb +39 -0
  39. data/lib/ruby_llm/contract/eval.rb +13 -0
  40. data/lib/ruby_llm/contract/pipeline/base.rb +10 -1
  41. data/lib/ruby_llm/contract/rspec/pass_eval.rb +84 -3
  42. data/lib/ruby_llm/contract/rspec.rb +5 -0
  43. data/lib/ruby_llm/contract/step/adapter_caller.rb +23 -0
  44. data/lib/ruby_llm/contract/step/base.rb +93 -38
  45. data/lib/ruby_llm/contract/step/dsl.rb +10 -0
  46. data/lib/ruby_llm/contract/step/input_validator.rb +34 -0
  47. data/lib/ruby_llm/contract/step/limit_checker.rb +11 -11
  48. data/lib/ruby_llm/contract/step/prompt_compiler.rb +33 -0
  49. data/lib/ruby_llm/contract/step/result.rb +3 -2
  50. data/lib/ruby_llm/contract/step/result_builder.rb +60 -0
  51. data/lib/ruby_llm/contract/step/retry_executor.rb +1 -0
  52. data/lib/ruby_llm/contract/step/runner.rb +46 -85
  53. data/lib/ruby_llm/contract/step/runner_config.rb +37 -0
  54. data/lib/ruby_llm/contract/step.rb +5 -0
  55. data/lib/ruby_llm/contract/version.rb +1 -1
  56. metadata +28 -1
@@ -0,0 +1,70 @@
1
+ # frozen_string_literal: true
2
+
3
+ module RubyLLM
4
+ module Contract
5
+ class SchemaValidator
6
+ # Immutable validation context for one schema node and its current path.
7
+ class Node < Data.define(:value, :schema, :path)
8
+ def expected_type
9
+ schema[:type]&.to_s
10
+ end
11
+
12
+ def object_schema?
13
+ expected_type == "object" || schema.key?(:properties)
14
+ end
15
+
16
+ def hash?
17
+ value.is_a?(Hash)
18
+ end
19
+
20
+ def array?
21
+ value.is_a?(Array)
22
+ end
23
+
24
+ def numeric?
25
+ value.is_a?(Numeric)
26
+ end
27
+
28
+ def properties
29
+ schema[:properties] || {}
30
+ end
31
+
32
+ def required_fields
33
+ Array(schema[:required]).map(&:to_s)
34
+ end
35
+
36
+ def items_schema
37
+ schema[:items]
38
+ end
39
+
40
+ def key_present?(field)
41
+ symbolized = field.to_sym
42
+ value.key?(symbolized) || value.key?(field.to_s)
43
+ end
44
+
45
+ def field_value(field)
46
+ symbolized = field.to_sym
47
+ return value[symbolized] if value.key?(symbolized)
48
+
49
+ value[field.to_s]
50
+ end
51
+
52
+ def extra_keys
53
+ value.keys.map(&:to_s)
54
+ end
55
+
56
+ def qualify(field)
57
+ path ? "#{path}.#{field}" : field.to_s
58
+ end
59
+
60
+ def child(field, child_value, child_schema)
61
+ self.class.new(value: child_value, schema: child_schema, path: qualify(field))
62
+ end
63
+
64
+ def array_item(index, item, item_schema)
65
+ self.class.new(value: item, schema: item_schema, path: "#{path}[#{index}]")
66
+ end
67
+ end
68
+ end
69
+ end
70
+ end
@@ -0,0 +1,66 @@
1
+ # frozen_string_literal: true
2
+
3
+ module RubyLLM
4
+ module Contract
5
+ class SchemaValidator
6
+ # Applies object-only validation rules to a schema node.
7
+ class ObjectRules
8
+ def initialize(errors)
9
+ @errors = errors
10
+ end
11
+
12
+ def validate(node)
13
+ validate_required_fields(node)
14
+ validate_properties(node) { |child| yield child }
15
+ validate_additional_properties(node)
16
+ end
17
+
18
+ private
19
+
20
+ def validate_required_fields(node)
21
+ node.required_fields.each do |field|
22
+ next if node.key_present?(field)
23
+
24
+ @errors << "missing required field: #{node.qualify(field)}"
25
+ end
26
+ end
27
+
28
+ def validate_properties(node)
29
+ required = node.required_fields
30
+
31
+ node.properties.each do |field, field_schema|
32
+ next unless node.key_present?(field)
33
+
34
+ value = node.field_value(field)
35
+ qualified = node.qualify(field)
36
+
37
+ if value.nil?
38
+ validate_nil_field(qualified, field_schema, required.include?(field.to_s))
39
+ next
40
+ end
41
+
42
+ yield node.child(field, value, field_schema)
43
+ end
44
+ end
45
+
46
+ def validate_nil_field(path, field_schema, required)
47
+ return unless required
48
+
49
+ expected_type = field_schema[:type] || "non-null"
50
+ @errors << "#{path}: expected #{expected_type}, got nil"
51
+ end
52
+
53
+ def validate_additional_properties(node)
54
+ return unless node.schema[:additionalProperties] == false
55
+
56
+ allowed_keys = node.properties.keys.map(&:to_s)
57
+ extra_keys = node.extra_keys.reject { |key| allowed_keys.include?(key) }
58
+
59
+ extra_keys.each do |extra_key|
60
+ @errors << "#{node.qualify(extra_key)}: additional property not allowed"
61
+ end
62
+ end
63
+ end
64
+ end
65
+ end
66
+ end
@@ -0,0 +1,22 @@
1
+ # frozen_string_literal: true
2
+
3
+ module RubyLLM
4
+ module Contract
5
+ class SchemaValidator
6
+ # Applies scalar-only validation rules to a schema node.
7
+ class ScalarRules
8
+ def initialize(errors)
9
+ @rules = [
10
+ TypeRule.new(errors),
11
+ EnumRule.new(errors),
12
+ BoundRule.new(errors)
13
+ ]
14
+ end
15
+
16
+ def validate(node)
17
+ @rules.each { |rule| rule.validate(node) }
18
+ end
19
+ end
20
+ end
21
+ end
22
+ end
@@ -0,0 +1,23 @@
1
+ # frozen_string_literal: true
2
+
3
+ module RubyLLM
4
+ module Contract
5
+ class SchemaValidator
6
+ class SchemaExtractor
7
+ include Concerns::DeepSymbolize
8
+
9
+ def call(schema)
10
+ schema_payload = schema.is_a?(Class) ? schema.new : schema
11
+ raw_schema = if schema_payload.respond_to?(:to_json_schema)
12
+ json_schema = schema_payload.to_json_schema
13
+ json_schema[:schema] || json_schema["schema"] || json_schema
14
+ else
15
+ schema
16
+ end
17
+
18
+ deep_symbolize(raw_schema)
19
+ end
20
+ end
21
+ end
22
+ end
23
+ end
@@ -0,0 +1,30 @@
1
+ # frozen_string_literal: true
2
+
3
+ module RubyLLM
4
+ module Contract
5
+ class SchemaValidator
6
+ # Validates the declared JSON schema type for a node.
7
+ class TypeRule
8
+ def initialize(errors)
9
+ @errors = errors
10
+ end
11
+
12
+ def validate(node)
13
+ expected_type = node.expected_type
14
+ value = node.value
15
+ return unless expected_type
16
+ return if type_valid?(expected_type, value)
17
+
18
+ @errors << "#{node.path}: expected #{expected_type}, got #{value.class}"
19
+ end
20
+
21
+ private
22
+
23
+ def type_valid?(expected_type, value)
24
+ checker = SchemaValidator::TYPE_CHECKS[expected_type]
25
+ checker ? checker.call(value) : true
26
+ end
27
+ end
28
+ end
29
+ end
30
+ end
@@ -1,305 +1,80 @@
1
1
  # frozen_string_literal: true
2
2
 
3
+ require_relative "schema_validator/node"
4
+ require_relative "schema_validator/schema_extractor"
5
+ require_relative "schema_validator/type_rule"
6
+ require_relative "schema_validator/enum_rule"
7
+ require_relative "schema_validator/bound_rule"
8
+ require_relative "schema_validator/scalar_rules"
9
+ require_relative "schema_validator/object_rules"
10
+
3
11
  module RubyLLM
4
12
  module Contract
5
13
  # Client-side validation of parsed output against an output_schema.
6
14
  # Checks required fields, enum constraints, number ranges, and nested objects.
7
15
  # This complements provider-side enforcement (with_schema) and catches
8
16
  # violations when using Test adapter or providers that ignore schemas.
9
- class SchemaValidator # rubocop:disable Metrics/ClassLength
10
- include Concerns::DeepSymbolize
11
-
12
- # Bundles field path, value, and constraints to reduce parameter passing
13
- FieldCheck = Struct.new(:qualified, :value, :constraints)
14
-
17
+ class SchemaValidator
15
18
  SIZE_BOUNDS = {
16
19
  string: { min_key: :minLength, max_key: :maxLength, metric: "length" },
17
20
  array: { min_key: :minItems, max_key: :maxItems, metric: "array length" }
18
21
  }.freeze
22
+ TYPE_CHECKS = {
23
+ "string" => ->(value) { value.is_a?(String) },
24
+ "integer" => ->(value) { value.is_a?(Integer) },
25
+ "number" => ->(value) { value.is_a?(Numeric) },
26
+ "boolean" => ->(value) { value.is_a?(TrueClass) || value.is_a?(FalseClass) },
27
+ "array" => ->(value) { value.is_a?(Array) },
28
+ "object" => ->(value) { value.is_a?(Hash) }
29
+ }.freeze
19
30
 
20
31
  def self.validate(parsed_output, schema)
21
32
  new(parsed_output, schema).validate
22
33
  end
23
34
 
24
35
  def initialize(parsed_output, schema)
25
- @output = parsed_output
26
- @json_schema = extract_schema(schema)
27
36
  @errors = []
37
+ json_schema = SchemaExtractor.new.call(schema)
38
+ path = root_object_schema?(json_schema) ? nil : "root"
39
+ @root_node = Node.new(value: parsed_output, schema: json_schema, path: path)
40
+ @scalar_rules = ScalarRules.new(@errors)
41
+ @object_rules = ObjectRules.new(@errors)
28
42
  end
29
43
 
30
44
  def validate
31
- return [] unless @json_schema.is_a?(Hash)
32
-
33
- return validate_non_hash_output unless @output.is_a?(Hash)
34
-
35
- validate_object(@output, @json_schema, prefix: nil)
36
- @errors
37
- end
38
-
39
- private
40
-
41
- def validate_non_hash_output
42
- expected_type = @json_schema[:type]&.to_s
43
-
44
- if expected_type == "object" || @json_schema.key?(:properties)
45
- return ["expected object, got #{@output.class}"]
46
- end
47
-
48
- errors = []
49
- validate_type_match(errors, @output, expected_type, "root") if expected_type
50
- validate_constraints(errors, @output, @json_schema, "root")
51
-
52
- if expected_type == "array" && @output.is_a?(Array) && @json_schema[:items]
53
- validate_array_items(errors, @output, @json_schema[:items], "")
54
- end
55
-
56
- errors
57
- end
58
-
59
- def validate_array_items(errors, array, items_schema, prefix)
60
- array.each_with_index do |item, i|
61
- item_prefix = "#{prefix}[#{i}]"
62
- validate_value(errors, item, items_schema, item_prefix)
63
- end
64
- end
65
-
66
- def validate_value(errors, value, schema, prefix)
67
- value_type = schema[:type]&.to_s
68
-
69
- validate_type_match(errors, value, value_type, prefix) if value_type
70
- validate_constraints(errors, value, schema, prefix)
71
-
72
- if value.is_a?(Hash) && (schema.key?(:properties) || value_type == "object")
73
- validate_object(value, schema, prefix: prefix)
74
- errors.concat(@errors)
75
- @errors = []
76
- elsif value.is_a?(Array) && schema[:items]
77
- validate_array_items(errors, value, schema[:items], prefix)
78
- end
79
- end
80
-
81
- def validate_type_match(errors, value, expected_type, prefix)
82
- valid = case expected_type
83
- when "string" then value.is_a?(String)
84
- when "integer" then value.is_a?(Integer)
85
- when "number" then value.is_a?(Numeric)
86
- when "boolean" then value.is_a?(TrueClass) || value.is_a?(FalseClass)
87
- when "array" then value.is_a?(Array)
88
- else true
89
- end
90
- errors << "#{prefix}: expected #{expected_type}, got #{value.class}" unless valid
91
- end
92
-
93
- def validate_constraints(errors, value, schema, prefix)
94
- if schema[:minimum] && value.is_a?(Numeric) && value < schema[:minimum]
95
- errors << "#{prefix}: #{value} is less than minimum #{schema[:minimum]}"
96
- end
97
- if schema[:maximum] && value.is_a?(Numeric) && value > schema[:maximum]
98
- errors << "#{prefix}: #{value} is greater than maximum #{schema[:maximum]}"
99
- end
100
- if schema[:enum] && !schema[:enum].include?(value)
101
- errors << "#{prefix}: #{value.inspect} is not in enum #{schema[:enum].inspect}"
102
- end
103
- if schema[:minItems] && value.is_a?(Array) && value.length < schema[:minItems]
104
- errors << "#{prefix}: array has #{value.length} items, minimum #{schema[:minItems]}"
105
- end
106
- if schema[:maxItems] && value.is_a?(Array) && value.length > schema[:maxItems]
107
- errors << "#{prefix}: array has #{value.length} items, maximum #{schema[:maxItems]}"
108
- end
109
- if schema[:minLength] && value.is_a?(String) && value.length < schema[:minLength]
110
- errors << "#{prefix}: string length #{value.length} is less than minLength #{schema[:minLength]}"
111
- end
112
- if schema[:maxLength] && value.is_a?(String) && value.length > schema[:maxLength]
113
- errors << "#{prefix}: string length #{value.length} is greater than maxLength #{schema[:maxLength]}"
114
- end
115
- end
116
-
117
- def extract_schema(schema)
118
- instance = schema.is_a?(Class) ? schema.new : schema
119
- json = if instance.respond_to?(:to_json_schema)
120
- schema_data = instance.to_json_schema
121
- schema_data[:schema] || schema_data["schema"] || schema_data
122
- else
123
- schema
124
- end
125
- deep_symbolize(json)
126
- end
127
-
128
- def validate_object(output, schema, prefix:)
129
- return unless output.is_a?(Hash) && schema.is_a?(Hash)
130
-
131
- properties = schema[:properties] || {}
132
- required = schema[:required] || []
133
-
134
- check_required(required, output, prefix: prefix)
135
- check_properties(properties, output, prefix: prefix, required_fields: required)
136
- check_additional_properties(output, schema, prefix: prefix)
137
- end
138
-
139
- def check_required(required, output, prefix:)
140
- required.each do |field|
141
- key = field.to_s.to_sym
142
- qualified = qualify(prefix, field)
143
- @errors << "missing required field: #{qualified}" unless output.key?(key)
144
- end
145
- end
146
-
147
- def check_properties(properties, output, prefix:, required_fields: [])
148
- required_syms = required_fields.map { |field| field.to_s.to_sym }
149
-
150
- properties.each do |field, constraints|
151
- key = field.to_sym
152
- value = output[key]
153
- qualified = qualify(prefix, field)
154
-
155
- if value.nil?
156
- check_nil_required(qualified, key, constraints, required_syms, output)
157
- next
158
- end
45
+ return [] unless @root_node.schema.is_a?(Hash)
159
46
 
160
- validate_field(FieldCheck.new(qualified: qualified, value: value, constraints: constraints))
161
- end
162
- end
163
-
164
- def check_nil_required(qualified, key, constraints, required_syms, output)
165
- return unless required_syms.include?(key) && output.key?(key)
166
-
167
- expected = constraints[:type] || "non-null"
168
- @errors << "#{qualified}: expected #{expected}, got nil"
169
- end
170
-
171
- def check_additional_properties(output, schema, prefix:)
172
- return unless schema[:additionalProperties] == false
173
-
174
- allowed_keys = (schema[:properties] || {}).keys.map { |prop_key| prop_key.to_s.to_sym }
175
- extra_keys = output.keys - allowed_keys
176
-
177
- extra_keys.each do |extra_key|
178
- @errors << "#{qualify(prefix, extra_key)}: additional property not allowed"
179
- end
180
- end
181
-
182
- def validate_field(field_check)
183
- check_enum(field_check)
184
- check_number_range(field_check)
185
- check_type_constraint(field_check)
186
- check_string_length(field_check)
187
- check_nested(field_check)
188
- end
189
-
190
- def check_enum(field_check)
191
- qualified, value, constraints = field_check.to_a
192
- enum = constraints[:enum]
193
- return unless enum
194
-
195
- @errors << "#{qualified}: #{value.inspect} is not in enum #{enum.inspect}" unless enum.include?(value)
196
- end
197
-
198
- def check_number_range(field_check)
199
- qualified, value, constraints = field_check.to_a
200
- return unless value.is_a?(Numeric)
201
-
202
- check_minimum(qualified, value, constraints[:minimum])
203
- check_maximum(qualified, value, constraints[:maximum])
204
- end
205
-
206
- def check_type_constraint(field_check)
207
- qualified, value, constraints = field_check.to_a
208
- expected_type = constraints[:type]&.to_s
209
- return unless expected_type
210
-
211
- @errors << "#{qualified}: expected #{expected_type}, got #{value.class}" unless type_valid?(expected_type,
212
- value)
213
- end
214
-
215
- def type_valid?(expected_type, value)
216
- case expected_type
217
- when "string" then value.is_a?(String)
218
- when "number" then value.is_a?(Numeric)
219
- when "integer" then value.is_a?(Integer)
220
- when "boolean" then [true, false].include?(value)
221
- when "array" then value.is_a?(Array)
222
- when "object" then value.is_a?(Hash)
223
- else true
224
- end
225
- end
226
-
227
- def check_nested(field_check)
228
- qualified, value, constraints = field_check.to_a
229
- nested_type = constraints[:type]&.to_s
230
-
231
- case nested_type
232
- when "object"
233
- validate_object(value, constraints, prefix: qualified) if value.is_a?(Hash)
234
- when "array"
235
- check_array_items(qualified, value, constraints) if value.is_a?(Array)
47
+ if @root_node.object_schema? && !@root_node.hash?
48
+ ["expected object, got #{@root_node.value.class}"]
49
+ else
50
+ validate_root
51
+ @errors
236
52
  end
237
53
  end
238
54
 
239
- def check_string_length(field_check)
240
- qualified, value, constraints = field_check.to_a
241
- check_size_bounds(qualified, value.length, constraints, :string) if value.is_a?(String)
242
- end
55
+ private
243
56
 
244
- def check_array_length(qualified, value, constraints)
245
- check_size_bounds(qualified, value.length, constraints, :array) if value.is_a?(Array)
57
+ def validate_root
58
+ validate_node(@root_node)
246
59
  end
247
60
 
248
- def check_size_bounds(qualified, actual, constraints, kind)
249
- bounds = SIZE_BOUNDS[kind]
250
- check_size_minimum(qualified, actual, constraints[bounds[:min_key]], bounds)
251
- check_size_maximum(qualified, actual, constraints[bounds[:max_key]], bounds)
61
+ def validate_node(node)
62
+ @scalar_rules.validate(node)
63
+ @object_rules.validate(node) { |child| validate_node(child) } if node.hash? && node.object_schema?
64
+ validate_array(node) if node.array?
252
65
  end
253
66
 
254
- def check_array_items(qualified, value, constraints)
255
- check_array_length(qualified, value, constraints)
256
-
257
- items_schema = constraints[:items]
67
+ def validate_array(node)
68
+ items_schema = node.items_schema
258
69
  return unless items_schema.is_a?(Hash)
259
70
 
260
- value.each_with_index do |item, idx|
261
- validate_array_item("#{qualified}[#{idx}]", item, items_schema)
71
+ node.value.each_with_index do |item, index|
72
+ validate_node(node.array_item(index, item, items_schema))
262
73
  end
263
74
  end
264
75
 
265
- def validate_array_item(item_key, item, items_schema)
266
- item_type = items_schema[:type]&.to_s
267
-
268
- if item_type == "object" && item.is_a?(Hash)
269
- validate_object(item, items_schema, prefix: item_key)
270
- elsif item_type == "array" && item.is_a?(Array)
271
- check_array_items(item_key, item, items_schema)
272
- else
273
- validate_field(FieldCheck.new(qualified: item_key, value: item, constraints: items_schema))
274
- end
275
- end
276
-
277
- def check_minimum(qualified, actual, limit)
278
- return unless limit && actual < limit
279
-
280
- @errors << "#{qualified}: #{actual} is below minimum #{limit}"
281
- end
282
-
283
- def check_maximum(qualified, actual, limit)
284
- return unless limit && actual > limit
285
-
286
- @errors << "#{qualified}: #{actual} is above maximum #{limit}"
287
- end
288
-
289
- def check_size_minimum(qualified, actual, limit, bounds)
290
- return unless limit && actual < limit
291
-
292
- @errors << "#{qualified}: #{bounds[:metric]} #{actual} is below #{bounds[:min_key]} #{limit}"
293
- end
294
-
295
- def check_size_maximum(qualified, actual, limit, bounds)
296
- return unless limit && actual > limit
297
-
298
- @errors << "#{qualified}: #{bounds[:metric]} #{actual} is above #{bounds[:max_key]} #{limit}"
299
- end
300
-
301
- def qualify(prefix, field)
302
- prefix ? "#{prefix}.#{field}" : field.to_s
76
+ def root_object_schema?(schema)
77
+ schema[:type]&.to_s == "object" || schema.key?(:properties)
303
78
  end
304
79
  end
305
80
  end
@@ -19,6 +19,15 @@ module RubyLLM
19
19
  input: input, schema: schema)
20
20
  end
21
21
 
22
+ def self.run_observations(observers, parsed_output, input: nil)
23
+ observers.map do |obs|
24
+ passed = obs.call(parsed_output, input: input)
25
+ { description: obs.description, passed: !!passed }
26
+ rescue StandardError => e
27
+ { description: obs.description, passed: false, error: "#{e.class}: #{e.message}" }
28
+ end
29
+ end
30
+
22
31
  private
23
32
 
24
33
  def parse_error?(parsed_output)
@@ -0,0 +1,52 @@
1
+ # frozen_string_literal: true
2
+
3
+ module RubyLLM
4
+ module Contract
5
+ module Eval
6
+ class CaseExecutor
7
+ def initialize(step:, scorer: nil, normalizer: StepResultNormalizer.new,
8
+ result_builder: CaseResultBuilder.new,
9
+ step_expectation_applier: StepExpectationApplier.new)
10
+ @step = step
11
+ @scorer = scorer || CaseScorer.new(step: step)
12
+ @normalizer = normalizer
13
+ @result_builder = result_builder
14
+ @step_expectation_applier = step_expectation_applier
15
+ end
16
+
17
+ def call(test_case:, context:)
18
+ run_result = @step.run(test_case.input, context: context)
19
+ step_result = @normalizer.call(run_result)
20
+ evaluation = @scorer.call(test_case: test_case, step_result: step_result)
21
+ result = @result_builder.call(test_case: test_case, step_result: step_result, evaluation: evaluation)
22
+
23
+ @step_expectation_applier.call(result: result, run_result: run_result, test_case: test_case)
24
+ rescue RubyLLM::Contract::Error => error
25
+ raise unless missing_adapter?(error)
26
+
27
+ skipped_result(test_case, error.message)
28
+ end
29
+
30
+ private
31
+
32
+ def missing_adapter?(error)
33
+ error.message.include?("No adapter configured")
34
+ end
35
+
36
+ def skipped_result(test_case, reason)
37
+ CaseResult.new(
38
+ name: test_case.name,
39
+ input: test_case.input,
40
+ output: nil,
41
+ expected: test_case.expected,
42
+ step_status: :skipped,
43
+ score: 0.0,
44
+ passed: false,
45
+ label: "SKIP",
46
+ details: "skipped: #{reason}"
47
+ )
48
+ end
49
+ end
50
+ end
51
+ end
52
+ end
@@ -0,0 +1,35 @@
1
+ # frozen_string_literal: true
2
+
3
+ module RubyLLM
4
+ module Contract
5
+ module Eval
6
+ class CaseResultBuilder
7
+ def call(test_case:, step_result:, evaluation:)
8
+ trace = step_result.respond_to?(:trace) ? step_result.trace : nil
9
+
10
+ CaseResult.new(
11
+ name: test_case.name,
12
+ input: test_case.input,
13
+ output: step_result.parsed_output,
14
+ expected: test_case.expected,
15
+ step_status: step_result.status,
16
+ score: evaluation.score,
17
+ passed: evaluation.passed,
18
+ label: evaluation.label,
19
+ details: evaluation.details,
20
+ duration_ms: trace_metric(trace, :total_latency_ms, :latency_ms),
21
+ cost: trace_metric(trace, :total_cost, :cost)
22
+ )
23
+ end
24
+
25
+ private
26
+
27
+ def trace_metric(trace, pipeline_key, step_key)
28
+ return nil unless trace
29
+
30
+ trace.respond_to?(pipeline_key) ? trace.public_send(pipeline_key) : trace[step_key]
31
+ end
32
+ end
33
+ end
34
+ end
35
+ end