ruby_llm-contract 0.4.5 → 0.5.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.rubycritic.yml +8 -0
- data/.simplecov +22 -0
- data/CHANGELOG.md +19 -0
- data/Gemfile +2 -0
- data/Gemfile.lock +104 -2
- data/README.md +42 -2
- data/lib/ruby_llm/contract/concerns/context_helpers.rb +11 -10
- data/lib/ruby_llm/contract/concerns/deep_freeze.rb +13 -7
- data/lib/ruby_llm/contract/concerns/deep_symbolize.rb +15 -5
- data/lib/ruby_llm/contract/concerns/eval_host.rb +51 -7
- data/lib/ruby_llm/contract/contract/schema_validator/bound_rule.rb +85 -0
- data/lib/ruby_llm/contract/contract/schema_validator/enum_rule.rb +23 -0
- data/lib/ruby_llm/contract/contract/schema_validator/node.rb +70 -0
- data/lib/ruby_llm/contract/contract/schema_validator/object_rules.rb +66 -0
- data/lib/ruby_llm/contract/contract/schema_validator/scalar_rules.rb +22 -0
- data/lib/ruby_llm/contract/contract/schema_validator/schema_extractor.rb +23 -0
- data/lib/ruby_llm/contract/contract/schema_validator/type_rule.rb +30 -0
- data/lib/ruby_llm/contract/contract/schema_validator.rb +41 -266
- data/lib/ruby_llm/contract/contract/validator.rb +9 -0
- data/lib/ruby_llm/contract/eval/case_executor.rb +52 -0
- data/lib/ruby_llm/contract/eval/case_result_builder.rb +35 -0
- data/lib/ruby_llm/contract/eval/case_scorer.rb +66 -0
- data/lib/ruby_llm/contract/eval/evaluator/exact.rb +8 -6
- data/lib/ruby_llm/contract/eval/evaluator/proc_evaluator.rb +22 -10
- data/lib/ruby_llm/contract/eval/evaluator/regex.rb +11 -8
- data/lib/ruby_llm/contract/eval/expectation_evaluator.rb +26 -0
- data/lib/ruby_llm/contract/eval/prompt_diff.rb +39 -0
- data/lib/ruby_llm/contract/eval/prompt_diff_comparator.rb +116 -0
- data/lib/ruby_llm/contract/eval/prompt_diff_presenter.rb +99 -0
- data/lib/ruby_llm/contract/eval/prompt_diff_serializer.rb +23 -0
- data/lib/ruby_llm/contract/eval/report.rb +19 -191
- data/lib/ruby_llm/contract/eval/report_presenter.rb +65 -0
- data/lib/ruby_llm/contract/eval/report_stats.rb +65 -0
- data/lib/ruby_llm/contract/eval/report_storage.rb +107 -0
- data/lib/ruby_llm/contract/eval/runner.rb +30 -207
- data/lib/ruby_llm/contract/eval/step_expectation_applier.rb +67 -0
- data/lib/ruby_llm/contract/eval/step_result_normalizer.rb +39 -0
- data/lib/ruby_llm/contract/eval.rb +13 -0
- data/lib/ruby_llm/contract/pipeline/base.rb +10 -1
- data/lib/ruby_llm/contract/rspec/pass_eval.rb +84 -3
- data/lib/ruby_llm/contract/rspec.rb +5 -0
- data/lib/ruby_llm/contract/step/adapter_caller.rb +23 -0
- data/lib/ruby_llm/contract/step/base.rb +93 -38
- data/lib/ruby_llm/contract/step/dsl.rb +10 -0
- data/lib/ruby_llm/contract/step/input_validator.rb +34 -0
- data/lib/ruby_llm/contract/step/limit_checker.rb +11 -11
- data/lib/ruby_llm/contract/step/prompt_compiler.rb +33 -0
- data/lib/ruby_llm/contract/step/result.rb +3 -2
- data/lib/ruby_llm/contract/step/result_builder.rb +60 -0
- data/lib/ruby_llm/contract/step/retry_executor.rb +1 -0
- data/lib/ruby_llm/contract/step/runner.rb +46 -85
- data/lib/ruby_llm/contract/step/runner_config.rb +37 -0
- data/lib/ruby_llm/contract/step.rb +5 -0
- data/lib/ruby_llm/contract/version.rb +1 -1
- metadata +28 -1
|
@@ -0,0 +1,70 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module RubyLLM
|
|
4
|
+
module Contract
|
|
5
|
+
class SchemaValidator
|
|
6
|
+
# Immutable validation context for one schema node and its current path.
|
|
7
|
+
class Node < Data.define(:value, :schema, :path)
|
|
8
|
+
def expected_type
|
|
9
|
+
schema[:type]&.to_s
|
|
10
|
+
end
|
|
11
|
+
|
|
12
|
+
def object_schema?
|
|
13
|
+
expected_type == "object" || schema.key?(:properties)
|
|
14
|
+
end
|
|
15
|
+
|
|
16
|
+
def hash?
|
|
17
|
+
value.is_a?(Hash)
|
|
18
|
+
end
|
|
19
|
+
|
|
20
|
+
def array?
|
|
21
|
+
value.is_a?(Array)
|
|
22
|
+
end
|
|
23
|
+
|
|
24
|
+
def numeric?
|
|
25
|
+
value.is_a?(Numeric)
|
|
26
|
+
end
|
|
27
|
+
|
|
28
|
+
def properties
|
|
29
|
+
schema[:properties] || {}
|
|
30
|
+
end
|
|
31
|
+
|
|
32
|
+
def required_fields
|
|
33
|
+
Array(schema[:required]).map(&:to_s)
|
|
34
|
+
end
|
|
35
|
+
|
|
36
|
+
def items_schema
|
|
37
|
+
schema[:items]
|
|
38
|
+
end
|
|
39
|
+
|
|
40
|
+
def key_present?(field)
|
|
41
|
+
symbolized = field.to_sym
|
|
42
|
+
value.key?(symbolized) || value.key?(field.to_s)
|
|
43
|
+
end
|
|
44
|
+
|
|
45
|
+
def field_value(field)
|
|
46
|
+
symbolized = field.to_sym
|
|
47
|
+
return value[symbolized] if value.key?(symbolized)
|
|
48
|
+
|
|
49
|
+
value[field.to_s]
|
|
50
|
+
end
|
|
51
|
+
|
|
52
|
+
def extra_keys
|
|
53
|
+
value.keys.map(&:to_s)
|
|
54
|
+
end
|
|
55
|
+
|
|
56
|
+
def qualify(field)
|
|
57
|
+
path ? "#{path}.#{field}" : field.to_s
|
|
58
|
+
end
|
|
59
|
+
|
|
60
|
+
def child(field, child_value, child_schema)
|
|
61
|
+
self.class.new(value: child_value, schema: child_schema, path: qualify(field))
|
|
62
|
+
end
|
|
63
|
+
|
|
64
|
+
def array_item(index, item, item_schema)
|
|
65
|
+
self.class.new(value: item, schema: item_schema, path: "#{path}[#{index}]")
|
|
66
|
+
end
|
|
67
|
+
end
|
|
68
|
+
end
|
|
69
|
+
end
|
|
70
|
+
end
|
|
@@ -0,0 +1,66 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module RubyLLM
|
|
4
|
+
module Contract
|
|
5
|
+
class SchemaValidator
|
|
6
|
+
# Applies object-only validation rules to a schema node.
|
|
7
|
+
class ObjectRules
|
|
8
|
+
def initialize(errors)
|
|
9
|
+
@errors = errors
|
|
10
|
+
end
|
|
11
|
+
|
|
12
|
+
def validate(node)
|
|
13
|
+
validate_required_fields(node)
|
|
14
|
+
validate_properties(node) { |child| yield child }
|
|
15
|
+
validate_additional_properties(node)
|
|
16
|
+
end
|
|
17
|
+
|
|
18
|
+
private
|
|
19
|
+
|
|
20
|
+
def validate_required_fields(node)
|
|
21
|
+
node.required_fields.each do |field|
|
|
22
|
+
next if node.key_present?(field)
|
|
23
|
+
|
|
24
|
+
@errors << "missing required field: #{node.qualify(field)}"
|
|
25
|
+
end
|
|
26
|
+
end
|
|
27
|
+
|
|
28
|
+
def validate_properties(node)
|
|
29
|
+
required = node.required_fields
|
|
30
|
+
|
|
31
|
+
node.properties.each do |field, field_schema|
|
|
32
|
+
next unless node.key_present?(field)
|
|
33
|
+
|
|
34
|
+
value = node.field_value(field)
|
|
35
|
+
qualified = node.qualify(field)
|
|
36
|
+
|
|
37
|
+
if value.nil?
|
|
38
|
+
validate_nil_field(qualified, field_schema, required.include?(field.to_s))
|
|
39
|
+
next
|
|
40
|
+
end
|
|
41
|
+
|
|
42
|
+
yield node.child(field, value, field_schema)
|
|
43
|
+
end
|
|
44
|
+
end
|
|
45
|
+
|
|
46
|
+
def validate_nil_field(path, field_schema, required)
|
|
47
|
+
return unless required
|
|
48
|
+
|
|
49
|
+
expected_type = field_schema[:type] || "non-null"
|
|
50
|
+
@errors << "#{path}: expected #{expected_type}, got nil"
|
|
51
|
+
end
|
|
52
|
+
|
|
53
|
+
def validate_additional_properties(node)
|
|
54
|
+
return unless node.schema[:additionalProperties] == false
|
|
55
|
+
|
|
56
|
+
allowed_keys = node.properties.keys.map(&:to_s)
|
|
57
|
+
extra_keys = node.extra_keys.reject { |key| allowed_keys.include?(key) }
|
|
58
|
+
|
|
59
|
+
extra_keys.each do |extra_key|
|
|
60
|
+
@errors << "#{node.qualify(extra_key)}: additional property not allowed"
|
|
61
|
+
end
|
|
62
|
+
end
|
|
63
|
+
end
|
|
64
|
+
end
|
|
65
|
+
end
|
|
66
|
+
end
|
|
@@ -0,0 +1,22 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module RubyLLM
|
|
4
|
+
module Contract
|
|
5
|
+
class SchemaValidator
|
|
6
|
+
# Applies scalar-only validation rules to a schema node.
|
|
7
|
+
class ScalarRules
|
|
8
|
+
def initialize(errors)
|
|
9
|
+
@rules = [
|
|
10
|
+
TypeRule.new(errors),
|
|
11
|
+
EnumRule.new(errors),
|
|
12
|
+
BoundRule.new(errors)
|
|
13
|
+
]
|
|
14
|
+
end
|
|
15
|
+
|
|
16
|
+
def validate(node)
|
|
17
|
+
@rules.each { |rule| rule.validate(node) }
|
|
18
|
+
end
|
|
19
|
+
end
|
|
20
|
+
end
|
|
21
|
+
end
|
|
22
|
+
end
|
|
@@ -0,0 +1,23 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module RubyLLM
|
|
4
|
+
module Contract
|
|
5
|
+
class SchemaValidator
|
|
6
|
+
class SchemaExtractor
|
|
7
|
+
include Concerns::DeepSymbolize
|
|
8
|
+
|
|
9
|
+
def call(schema)
|
|
10
|
+
schema_payload = schema.is_a?(Class) ? schema.new : schema
|
|
11
|
+
raw_schema = if schema_payload.respond_to?(:to_json_schema)
|
|
12
|
+
json_schema = schema_payload.to_json_schema
|
|
13
|
+
json_schema[:schema] || json_schema["schema"] || json_schema
|
|
14
|
+
else
|
|
15
|
+
schema
|
|
16
|
+
end
|
|
17
|
+
|
|
18
|
+
deep_symbolize(raw_schema)
|
|
19
|
+
end
|
|
20
|
+
end
|
|
21
|
+
end
|
|
22
|
+
end
|
|
23
|
+
end
|
|
@@ -0,0 +1,30 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module RubyLLM
|
|
4
|
+
module Contract
|
|
5
|
+
class SchemaValidator
|
|
6
|
+
# Validates the declared JSON schema type for a node.
|
|
7
|
+
class TypeRule
|
|
8
|
+
def initialize(errors)
|
|
9
|
+
@errors = errors
|
|
10
|
+
end
|
|
11
|
+
|
|
12
|
+
def validate(node)
|
|
13
|
+
expected_type = node.expected_type
|
|
14
|
+
value = node.value
|
|
15
|
+
return unless expected_type
|
|
16
|
+
return if type_valid?(expected_type, value)
|
|
17
|
+
|
|
18
|
+
@errors << "#{node.path}: expected #{expected_type}, got #{value.class}"
|
|
19
|
+
end
|
|
20
|
+
|
|
21
|
+
private
|
|
22
|
+
|
|
23
|
+
def type_valid?(expected_type, value)
|
|
24
|
+
checker = SchemaValidator::TYPE_CHECKS[expected_type]
|
|
25
|
+
checker ? checker.call(value) : true
|
|
26
|
+
end
|
|
27
|
+
end
|
|
28
|
+
end
|
|
29
|
+
end
|
|
30
|
+
end
|
|
@@ -1,305 +1,80 @@
|
|
|
1
1
|
# frozen_string_literal: true
|
|
2
2
|
|
|
3
|
+
require_relative "schema_validator/node"
|
|
4
|
+
require_relative "schema_validator/schema_extractor"
|
|
5
|
+
require_relative "schema_validator/type_rule"
|
|
6
|
+
require_relative "schema_validator/enum_rule"
|
|
7
|
+
require_relative "schema_validator/bound_rule"
|
|
8
|
+
require_relative "schema_validator/scalar_rules"
|
|
9
|
+
require_relative "schema_validator/object_rules"
|
|
10
|
+
|
|
3
11
|
module RubyLLM
|
|
4
12
|
module Contract
|
|
5
13
|
# Client-side validation of parsed output against an output_schema.
|
|
6
14
|
# Checks required fields, enum constraints, number ranges, and nested objects.
|
|
7
15
|
# This complements provider-side enforcement (with_schema) and catches
|
|
8
16
|
# violations when using Test adapter or providers that ignore schemas.
|
|
9
|
-
class SchemaValidator
|
|
10
|
-
include Concerns::DeepSymbolize
|
|
11
|
-
|
|
12
|
-
# Bundles field path, value, and constraints to reduce parameter passing
|
|
13
|
-
FieldCheck = Struct.new(:qualified, :value, :constraints)
|
|
14
|
-
|
|
17
|
+
class SchemaValidator
|
|
15
18
|
SIZE_BOUNDS = {
|
|
16
19
|
string: { min_key: :minLength, max_key: :maxLength, metric: "length" },
|
|
17
20
|
array: { min_key: :minItems, max_key: :maxItems, metric: "array length" }
|
|
18
21
|
}.freeze
|
|
22
|
+
TYPE_CHECKS = {
|
|
23
|
+
"string" => ->(value) { value.is_a?(String) },
|
|
24
|
+
"integer" => ->(value) { value.is_a?(Integer) },
|
|
25
|
+
"number" => ->(value) { value.is_a?(Numeric) },
|
|
26
|
+
"boolean" => ->(value) { value.is_a?(TrueClass) || value.is_a?(FalseClass) },
|
|
27
|
+
"array" => ->(value) { value.is_a?(Array) },
|
|
28
|
+
"object" => ->(value) { value.is_a?(Hash) }
|
|
29
|
+
}.freeze
|
|
19
30
|
|
|
20
31
|
def self.validate(parsed_output, schema)
|
|
21
32
|
new(parsed_output, schema).validate
|
|
22
33
|
end
|
|
23
34
|
|
|
24
35
|
def initialize(parsed_output, schema)
|
|
25
|
-
@output = parsed_output
|
|
26
|
-
@json_schema = extract_schema(schema)
|
|
27
36
|
@errors = []
|
|
37
|
+
json_schema = SchemaExtractor.new.call(schema)
|
|
38
|
+
path = root_object_schema?(json_schema) ? nil : "root"
|
|
39
|
+
@root_node = Node.new(value: parsed_output, schema: json_schema, path: path)
|
|
40
|
+
@scalar_rules = ScalarRules.new(@errors)
|
|
41
|
+
@object_rules = ObjectRules.new(@errors)
|
|
28
42
|
end
|
|
29
43
|
|
|
30
44
|
def validate
|
|
31
|
-
return [] unless @
|
|
32
|
-
|
|
33
|
-
return validate_non_hash_output unless @output.is_a?(Hash)
|
|
34
|
-
|
|
35
|
-
validate_object(@output, @json_schema, prefix: nil)
|
|
36
|
-
@errors
|
|
37
|
-
end
|
|
38
|
-
|
|
39
|
-
private
|
|
40
|
-
|
|
41
|
-
def validate_non_hash_output
|
|
42
|
-
expected_type = @json_schema[:type]&.to_s
|
|
43
|
-
|
|
44
|
-
if expected_type == "object" || @json_schema.key?(:properties)
|
|
45
|
-
return ["expected object, got #{@output.class}"]
|
|
46
|
-
end
|
|
47
|
-
|
|
48
|
-
errors = []
|
|
49
|
-
validate_type_match(errors, @output, expected_type, "root") if expected_type
|
|
50
|
-
validate_constraints(errors, @output, @json_schema, "root")
|
|
51
|
-
|
|
52
|
-
if expected_type == "array" && @output.is_a?(Array) && @json_schema[:items]
|
|
53
|
-
validate_array_items(errors, @output, @json_schema[:items], "")
|
|
54
|
-
end
|
|
55
|
-
|
|
56
|
-
errors
|
|
57
|
-
end
|
|
58
|
-
|
|
59
|
-
def validate_array_items(errors, array, items_schema, prefix)
|
|
60
|
-
array.each_with_index do |item, i|
|
|
61
|
-
item_prefix = "#{prefix}[#{i}]"
|
|
62
|
-
validate_value(errors, item, items_schema, item_prefix)
|
|
63
|
-
end
|
|
64
|
-
end
|
|
65
|
-
|
|
66
|
-
def validate_value(errors, value, schema, prefix)
|
|
67
|
-
value_type = schema[:type]&.to_s
|
|
68
|
-
|
|
69
|
-
validate_type_match(errors, value, value_type, prefix) if value_type
|
|
70
|
-
validate_constraints(errors, value, schema, prefix)
|
|
71
|
-
|
|
72
|
-
if value.is_a?(Hash) && (schema.key?(:properties) || value_type == "object")
|
|
73
|
-
validate_object(value, schema, prefix: prefix)
|
|
74
|
-
errors.concat(@errors)
|
|
75
|
-
@errors = []
|
|
76
|
-
elsif value.is_a?(Array) && schema[:items]
|
|
77
|
-
validate_array_items(errors, value, schema[:items], prefix)
|
|
78
|
-
end
|
|
79
|
-
end
|
|
80
|
-
|
|
81
|
-
def validate_type_match(errors, value, expected_type, prefix)
|
|
82
|
-
valid = case expected_type
|
|
83
|
-
when "string" then value.is_a?(String)
|
|
84
|
-
when "integer" then value.is_a?(Integer)
|
|
85
|
-
when "number" then value.is_a?(Numeric)
|
|
86
|
-
when "boolean" then value.is_a?(TrueClass) || value.is_a?(FalseClass)
|
|
87
|
-
when "array" then value.is_a?(Array)
|
|
88
|
-
else true
|
|
89
|
-
end
|
|
90
|
-
errors << "#{prefix}: expected #{expected_type}, got #{value.class}" unless valid
|
|
91
|
-
end
|
|
92
|
-
|
|
93
|
-
def validate_constraints(errors, value, schema, prefix)
|
|
94
|
-
if schema[:minimum] && value.is_a?(Numeric) && value < schema[:minimum]
|
|
95
|
-
errors << "#{prefix}: #{value} is less than minimum #{schema[:minimum]}"
|
|
96
|
-
end
|
|
97
|
-
if schema[:maximum] && value.is_a?(Numeric) && value > schema[:maximum]
|
|
98
|
-
errors << "#{prefix}: #{value} is greater than maximum #{schema[:maximum]}"
|
|
99
|
-
end
|
|
100
|
-
if schema[:enum] && !schema[:enum].include?(value)
|
|
101
|
-
errors << "#{prefix}: #{value.inspect} is not in enum #{schema[:enum].inspect}"
|
|
102
|
-
end
|
|
103
|
-
if schema[:minItems] && value.is_a?(Array) && value.length < schema[:minItems]
|
|
104
|
-
errors << "#{prefix}: array has #{value.length} items, minimum #{schema[:minItems]}"
|
|
105
|
-
end
|
|
106
|
-
if schema[:maxItems] && value.is_a?(Array) && value.length > schema[:maxItems]
|
|
107
|
-
errors << "#{prefix}: array has #{value.length} items, maximum #{schema[:maxItems]}"
|
|
108
|
-
end
|
|
109
|
-
if schema[:minLength] && value.is_a?(String) && value.length < schema[:minLength]
|
|
110
|
-
errors << "#{prefix}: string length #{value.length} is less than minLength #{schema[:minLength]}"
|
|
111
|
-
end
|
|
112
|
-
if schema[:maxLength] && value.is_a?(String) && value.length > schema[:maxLength]
|
|
113
|
-
errors << "#{prefix}: string length #{value.length} is greater than maxLength #{schema[:maxLength]}"
|
|
114
|
-
end
|
|
115
|
-
end
|
|
116
|
-
|
|
117
|
-
def extract_schema(schema)
|
|
118
|
-
instance = schema.is_a?(Class) ? schema.new : schema
|
|
119
|
-
json = if instance.respond_to?(:to_json_schema)
|
|
120
|
-
schema_data = instance.to_json_schema
|
|
121
|
-
schema_data[:schema] || schema_data["schema"] || schema_data
|
|
122
|
-
else
|
|
123
|
-
schema
|
|
124
|
-
end
|
|
125
|
-
deep_symbolize(json)
|
|
126
|
-
end
|
|
127
|
-
|
|
128
|
-
def validate_object(output, schema, prefix:)
|
|
129
|
-
return unless output.is_a?(Hash) && schema.is_a?(Hash)
|
|
130
|
-
|
|
131
|
-
properties = schema[:properties] || {}
|
|
132
|
-
required = schema[:required] || []
|
|
133
|
-
|
|
134
|
-
check_required(required, output, prefix: prefix)
|
|
135
|
-
check_properties(properties, output, prefix: prefix, required_fields: required)
|
|
136
|
-
check_additional_properties(output, schema, prefix: prefix)
|
|
137
|
-
end
|
|
138
|
-
|
|
139
|
-
def check_required(required, output, prefix:)
|
|
140
|
-
required.each do |field|
|
|
141
|
-
key = field.to_s.to_sym
|
|
142
|
-
qualified = qualify(prefix, field)
|
|
143
|
-
@errors << "missing required field: #{qualified}" unless output.key?(key)
|
|
144
|
-
end
|
|
145
|
-
end
|
|
146
|
-
|
|
147
|
-
def check_properties(properties, output, prefix:, required_fields: [])
|
|
148
|
-
required_syms = required_fields.map { |field| field.to_s.to_sym }
|
|
149
|
-
|
|
150
|
-
properties.each do |field, constraints|
|
|
151
|
-
key = field.to_sym
|
|
152
|
-
value = output[key]
|
|
153
|
-
qualified = qualify(prefix, field)
|
|
154
|
-
|
|
155
|
-
if value.nil?
|
|
156
|
-
check_nil_required(qualified, key, constraints, required_syms, output)
|
|
157
|
-
next
|
|
158
|
-
end
|
|
45
|
+
return [] unless @root_node.schema.is_a?(Hash)
|
|
159
46
|
|
|
160
|
-
|
|
161
|
-
|
|
162
|
-
|
|
163
|
-
|
|
164
|
-
|
|
165
|
-
return unless required_syms.include?(key) && output.key?(key)
|
|
166
|
-
|
|
167
|
-
expected = constraints[:type] || "non-null"
|
|
168
|
-
@errors << "#{qualified}: expected #{expected}, got nil"
|
|
169
|
-
end
|
|
170
|
-
|
|
171
|
-
def check_additional_properties(output, schema, prefix:)
|
|
172
|
-
return unless schema[:additionalProperties] == false
|
|
173
|
-
|
|
174
|
-
allowed_keys = (schema[:properties] || {}).keys.map { |prop_key| prop_key.to_s.to_sym }
|
|
175
|
-
extra_keys = output.keys - allowed_keys
|
|
176
|
-
|
|
177
|
-
extra_keys.each do |extra_key|
|
|
178
|
-
@errors << "#{qualify(prefix, extra_key)}: additional property not allowed"
|
|
179
|
-
end
|
|
180
|
-
end
|
|
181
|
-
|
|
182
|
-
def validate_field(field_check)
|
|
183
|
-
check_enum(field_check)
|
|
184
|
-
check_number_range(field_check)
|
|
185
|
-
check_type_constraint(field_check)
|
|
186
|
-
check_string_length(field_check)
|
|
187
|
-
check_nested(field_check)
|
|
188
|
-
end
|
|
189
|
-
|
|
190
|
-
def check_enum(field_check)
|
|
191
|
-
qualified, value, constraints = field_check.to_a
|
|
192
|
-
enum = constraints[:enum]
|
|
193
|
-
return unless enum
|
|
194
|
-
|
|
195
|
-
@errors << "#{qualified}: #{value.inspect} is not in enum #{enum.inspect}" unless enum.include?(value)
|
|
196
|
-
end
|
|
197
|
-
|
|
198
|
-
def check_number_range(field_check)
|
|
199
|
-
qualified, value, constraints = field_check.to_a
|
|
200
|
-
return unless value.is_a?(Numeric)
|
|
201
|
-
|
|
202
|
-
check_minimum(qualified, value, constraints[:minimum])
|
|
203
|
-
check_maximum(qualified, value, constraints[:maximum])
|
|
204
|
-
end
|
|
205
|
-
|
|
206
|
-
def check_type_constraint(field_check)
|
|
207
|
-
qualified, value, constraints = field_check.to_a
|
|
208
|
-
expected_type = constraints[:type]&.to_s
|
|
209
|
-
return unless expected_type
|
|
210
|
-
|
|
211
|
-
@errors << "#{qualified}: expected #{expected_type}, got #{value.class}" unless type_valid?(expected_type,
|
|
212
|
-
value)
|
|
213
|
-
end
|
|
214
|
-
|
|
215
|
-
def type_valid?(expected_type, value)
|
|
216
|
-
case expected_type
|
|
217
|
-
when "string" then value.is_a?(String)
|
|
218
|
-
when "number" then value.is_a?(Numeric)
|
|
219
|
-
when "integer" then value.is_a?(Integer)
|
|
220
|
-
when "boolean" then [true, false].include?(value)
|
|
221
|
-
when "array" then value.is_a?(Array)
|
|
222
|
-
when "object" then value.is_a?(Hash)
|
|
223
|
-
else true
|
|
224
|
-
end
|
|
225
|
-
end
|
|
226
|
-
|
|
227
|
-
def check_nested(field_check)
|
|
228
|
-
qualified, value, constraints = field_check.to_a
|
|
229
|
-
nested_type = constraints[:type]&.to_s
|
|
230
|
-
|
|
231
|
-
case nested_type
|
|
232
|
-
when "object"
|
|
233
|
-
validate_object(value, constraints, prefix: qualified) if value.is_a?(Hash)
|
|
234
|
-
when "array"
|
|
235
|
-
check_array_items(qualified, value, constraints) if value.is_a?(Array)
|
|
47
|
+
if @root_node.object_schema? && !@root_node.hash?
|
|
48
|
+
["expected object, got #{@root_node.value.class}"]
|
|
49
|
+
else
|
|
50
|
+
validate_root
|
|
51
|
+
@errors
|
|
236
52
|
end
|
|
237
53
|
end
|
|
238
54
|
|
|
239
|
-
|
|
240
|
-
qualified, value, constraints = field_check.to_a
|
|
241
|
-
check_size_bounds(qualified, value.length, constraints, :string) if value.is_a?(String)
|
|
242
|
-
end
|
|
55
|
+
private
|
|
243
56
|
|
|
244
|
-
def
|
|
245
|
-
|
|
57
|
+
def validate_root
|
|
58
|
+
validate_node(@root_node)
|
|
246
59
|
end
|
|
247
60
|
|
|
248
|
-
def
|
|
249
|
-
|
|
250
|
-
|
|
251
|
-
|
|
61
|
+
def validate_node(node)
|
|
62
|
+
@scalar_rules.validate(node)
|
|
63
|
+
@object_rules.validate(node) { |child| validate_node(child) } if node.hash? && node.object_schema?
|
|
64
|
+
validate_array(node) if node.array?
|
|
252
65
|
end
|
|
253
66
|
|
|
254
|
-
def
|
|
255
|
-
|
|
256
|
-
|
|
257
|
-
items_schema = constraints[:items]
|
|
67
|
+
def validate_array(node)
|
|
68
|
+
items_schema = node.items_schema
|
|
258
69
|
return unless items_schema.is_a?(Hash)
|
|
259
70
|
|
|
260
|
-
value.each_with_index do |item,
|
|
261
|
-
|
|
71
|
+
node.value.each_with_index do |item, index|
|
|
72
|
+
validate_node(node.array_item(index, item, items_schema))
|
|
262
73
|
end
|
|
263
74
|
end
|
|
264
75
|
|
|
265
|
-
def
|
|
266
|
-
|
|
267
|
-
|
|
268
|
-
if item_type == "object" && item.is_a?(Hash)
|
|
269
|
-
validate_object(item, items_schema, prefix: item_key)
|
|
270
|
-
elsif item_type == "array" && item.is_a?(Array)
|
|
271
|
-
check_array_items(item_key, item, items_schema)
|
|
272
|
-
else
|
|
273
|
-
validate_field(FieldCheck.new(qualified: item_key, value: item, constraints: items_schema))
|
|
274
|
-
end
|
|
275
|
-
end
|
|
276
|
-
|
|
277
|
-
def check_minimum(qualified, actual, limit)
|
|
278
|
-
return unless limit && actual < limit
|
|
279
|
-
|
|
280
|
-
@errors << "#{qualified}: #{actual} is below minimum #{limit}"
|
|
281
|
-
end
|
|
282
|
-
|
|
283
|
-
def check_maximum(qualified, actual, limit)
|
|
284
|
-
return unless limit && actual > limit
|
|
285
|
-
|
|
286
|
-
@errors << "#{qualified}: #{actual} is above maximum #{limit}"
|
|
287
|
-
end
|
|
288
|
-
|
|
289
|
-
def check_size_minimum(qualified, actual, limit, bounds)
|
|
290
|
-
return unless limit && actual < limit
|
|
291
|
-
|
|
292
|
-
@errors << "#{qualified}: #{bounds[:metric]} #{actual} is below #{bounds[:min_key]} #{limit}"
|
|
293
|
-
end
|
|
294
|
-
|
|
295
|
-
def check_size_maximum(qualified, actual, limit, bounds)
|
|
296
|
-
return unless limit && actual > limit
|
|
297
|
-
|
|
298
|
-
@errors << "#{qualified}: #{bounds[:metric]} #{actual} is above #{bounds[:max_key]} #{limit}"
|
|
299
|
-
end
|
|
300
|
-
|
|
301
|
-
def qualify(prefix, field)
|
|
302
|
-
prefix ? "#{prefix}.#{field}" : field.to_s
|
|
76
|
+
def root_object_schema?(schema)
|
|
77
|
+
schema[:type]&.to_s == "object" || schema.key?(:properties)
|
|
303
78
|
end
|
|
304
79
|
end
|
|
305
80
|
end
|
|
@@ -19,6 +19,15 @@ module RubyLLM
|
|
|
19
19
|
input: input, schema: schema)
|
|
20
20
|
end
|
|
21
21
|
|
|
22
|
+
def self.run_observations(observers, parsed_output, input: nil)
|
|
23
|
+
observers.map do |obs|
|
|
24
|
+
passed = obs.call(parsed_output, input: input)
|
|
25
|
+
{ description: obs.description, passed: !!passed }
|
|
26
|
+
rescue StandardError => e
|
|
27
|
+
{ description: obs.description, passed: false, error: "#{e.class}: #{e.message}" }
|
|
28
|
+
end
|
|
29
|
+
end
|
|
30
|
+
|
|
22
31
|
private
|
|
23
32
|
|
|
24
33
|
def parse_error?(parsed_output)
|
|
@@ -0,0 +1,52 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module RubyLLM
|
|
4
|
+
module Contract
|
|
5
|
+
module Eval
|
|
6
|
+
class CaseExecutor
|
|
7
|
+
def initialize(step:, scorer: nil, normalizer: StepResultNormalizer.new,
|
|
8
|
+
result_builder: CaseResultBuilder.new,
|
|
9
|
+
step_expectation_applier: StepExpectationApplier.new)
|
|
10
|
+
@step = step
|
|
11
|
+
@scorer = scorer || CaseScorer.new(step: step)
|
|
12
|
+
@normalizer = normalizer
|
|
13
|
+
@result_builder = result_builder
|
|
14
|
+
@step_expectation_applier = step_expectation_applier
|
|
15
|
+
end
|
|
16
|
+
|
|
17
|
+
def call(test_case:, context:)
|
|
18
|
+
run_result = @step.run(test_case.input, context: context)
|
|
19
|
+
step_result = @normalizer.call(run_result)
|
|
20
|
+
evaluation = @scorer.call(test_case: test_case, step_result: step_result)
|
|
21
|
+
result = @result_builder.call(test_case: test_case, step_result: step_result, evaluation: evaluation)
|
|
22
|
+
|
|
23
|
+
@step_expectation_applier.call(result: result, run_result: run_result, test_case: test_case)
|
|
24
|
+
rescue RubyLLM::Contract::Error => error
|
|
25
|
+
raise unless missing_adapter?(error)
|
|
26
|
+
|
|
27
|
+
skipped_result(test_case, error.message)
|
|
28
|
+
end
|
|
29
|
+
|
|
30
|
+
private
|
|
31
|
+
|
|
32
|
+
def missing_adapter?(error)
|
|
33
|
+
error.message.include?("No adapter configured")
|
|
34
|
+
end
|
|
35
|
+
|
|
36
|
+
def skipped_result(test_case, reason)
|
|
37
|
+
CaseResult.new(
|
|
38
|
+
name: test_case.name,
|
|
39
|
+
input: test_case.input,
|
|
40
|
+
output: nil,
|
|
41
|
+
expected: test_case.expected,
|
|
42
|
+
step_status: :skipped,
|
|
43
|
+
score: 0.0,
|
|
44
|
+
passed: false,
|
|
45
|
+
label: "SKIP",
|
|
46
|
+
details: "skipped: #{reason}"
|
|
47
|
+
)
|
|
48
|
+
end
|
|
49
|
+
end
|
|
50
|
+
end
|
|
51
|
+
end
|
|
52
|
+
end
|
|
@@ -0,0 +1,35 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module RubyLLM
|
|
4
|
+
module Contract
|
|
5
|
+
module Eval
|
|
6
|
+
class CaseResultBuilder
|
|
7
|
+
def call(test_case:, step_result:, evaluation:)
|
|
8
|
+
trace = step_result.respond_to?(:trace) ? step_result.trace : nil
|
|
9
|
+
|
|
10
|
+
CaseResult.new(
|
|
11
|
+
name: test_case.name,
|
|
12
|
+
input: test_case.input,
|
|
13
|
+
output: step_result.parsed_output,
|
|
14
|
+
expected: test_case.expected,
|
|
15
|
+
step_status: step_result.status,
|
|
16
|
+
score: evaluation.score,
|
|
17
|
+
passed: evaluation.passed,
|
|
18
|
+
label: evaluation.label,
|
|
19
|
+
details: evaluation.details,
|
|
20
|
+
duration_ms: trace_metric(trace, :total_latency_ms, :latency_ms),
|
|
21
|
+
cost: trace_metric(trace, :total_cost, :cost)
|
|
22
|
+
)
|
|
23
|
+
end
|
|
24
|
+
|
|
25
|
+
private
|
|
26
|
+
|
|
27
|
+
def trace_metric(trace, pipeline_key, step_key)
|
|
28
|
+
return nil unless trace
|
|
29
|
+
|
|
30
|
+
trace.respond_to?(pipeline_key) ? trace.public_send(pipeline_key) : trace[step_key]
|
|
31
|
+
end
|
|
32
|
+
end
|
|
33
|
+
end
|
|
34
|
+
end
|
|
35
|
+
end
|