ruby_llm-contract 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.rspec +3 -0
- data/.rubocop.yml +55 -0
- data/CHANGELOG.md +76 -0
- data/Gemfile +11 -0
- data/Gemfile.lock +176 -0
- data/LICENSE +21 -0
- data/README.md +154 -0
- data/Rakefile +8 -0
- data/examples/00_basics.rb +500 -0
- data/examples/01_classify_threads.rb +220 -0
- data/examples/02_generate_comment.rb +203 -0
- data/examples/03_target_audience.rb +201 -0
- data/examples/04_real_llm.rb +410 -0
- data/examples/05_output_schema.rb +258 -0
- data/examples/07_keyword_extraction.rb +239 -0
- data/examples/08_translation.rb +353 -0
- data/examples/09_eval_dataset.rb +287 -0
- data/examples/10_reddit_full_showcase.rb +363 -0
- data/examples/README.md +140 -0
- data/lib/ruby_llm/contract/adapters/base.rb +13 -0
- data/lib/ruby_llm/contract/adapters/response.rb +17 -0
- data/lib/ruby_llm/contract/adapters/ruby_llm.rb +94 -0
- data/lib/ruby_llm/contract/adapters/test.rb +44 -0
- data/lib/ruby_llm/contract/adapters.rb +6 -0
- data/lib/ruby_llm/contract/concerns/deep_symbolize.rb +17 -0
- data/lib/ruby_llm/contract/concerns/eval_host.rb +109 -0
- data/lib/ruby_llm/contract/concerns/trace_equality.rb +15 -0
- data/lib/ruby_llm/contract/concerns/usage_aggregator.rb +43 -0
- data/lib/ruby_llm/contract/configuration.rb +21 -0
- data/lib/ruby_llm/contract/contract/definition.rb +39 -0
- data/lib/ruby_llm/contract/contract/invariant.rb +23 -0
- data/lib/ruby_llm/contract/contract/parser.rb +143 -0
- data/lib/ruby_llm/contract/contract/schema_validator.rb +239 -0
- data/lib/ruby_llm/contract/contract/validator.rb +104 -0
- data/lib/ruby_llm/contract/contract.rb +7 -0
- data/lib/ruby_llm/contract/cost_calculator.rb +38 -0
- data/lib/ruby_llm/contract/dsl.rb +13 -0
- data/lib/ruby_llm/contract/errors.rb +19 -0
- data/lib/ruby_llm/contract/eval/case_result.rb +76 -0
- data/lib/ruby_llm/contract/eval/contract_detail_builder.rb +47 -0
- data/lib/ruby_llm/contract/eval/dataset.rb +53 -0
- data/lib/ruby_llm/contract/eval/eval_definition.rb +112 -0
- data/lib/ruby_llm/contract/eval/evaluation_result.rb +27 -0
- data/lib/ruby_llm/contract/eval/evaluator/exact.rb +20 -0
- data/lib/ruby_llm/contract/eval/evaluator/json_includes.rb +58 -0
- data/lib/ruby_llm/contract/eval/evaluator/proc_evaluator.rb +40 -0
- data/lib/ruby_llm/contract/eval/evaluator/regex.rb +27 -0
- data/lib/ruby_llm/contract/eval/model_comparison.rb +80 -0
- data/lib/ruby_llm/contract/eval/pipeline_result_adapter.rb +15 -0
- data/lib/ruby_llm/contract/eval/report.rb +115 -0
- data/lib/ruby_llm/contract/eval/runner.rb +162 -0
- data/lib/ruby_llm/contract/eval/trait_evaluator.rb +75 -0
- data/lib/ruby_llm/contract/eval.rb +16 -0
- data/lib/ruby_llm/contract/pipeline/base.rb +62 -0
- data/lib/ruby_llm/contract/pipeline/result.rb +131 -0
- data/lib/ruby_llm/contract/pipeline/runner.rb +139 -0
- data/lib/ruby_llm/contract/pipeline/trace.rb +72 -0
- data/lib/ruby_llm/contract/pipeline.rb +6 -0
- data/lib/ruby_llm/contract/prompt/ast.rb +38 -0
- data/lib/ruby_llm/contract/prompt/builder.rb +47 -0
- data/lib/ruby_llm/contract/prompt/node.rb +25 -0
- data/lib/ruby_llm/contract/prompt/nodes/example_node.rb +27 -0
- data/lib/ruby_llm/contract/prompt/nodes/rule_node.rb +15 -0
- data/lib/ruby_llm/contract/prompt/nodes/section_node.rb +26 -0
- data/lib/ruby_llm/contract/prompt/nodes/system_node.rb +15 -0
- data/lib/ruby_llm/contract/prompt/nodes/user_node.rb +15 -0
- data/lib/ruby_llm/contract/prompt/nodes.rb +7 -0
- data/lib/ruby_llm/contract/prompt/renderer.rb +76 -0
- data/lib/ruby_llm/contract/railtie.rb +20 -0
- data/lib/ruby_llm/contract/rake_task.rb +78 -0
- data/lib/ruby_llm/contract/rspec/pass_eval.rb +96 -0
- data/lib/ruby_llm/contract/rspec/satisfy_contract.rb +31 -0
- data/lib/ruby_llm/contract/rspec.rb +6 -0
- data/lib/ruby_llm/contract/step/base.rb +138 -0
- data/lib/ruby_llm/contract/step/dsl.rb +144 -0
- data/lib/ruby_llm/contract/step/limit_checker.rb +64 -0
- data/lib/ruby_llm/contract/step/result.rb +38 -0
- data/lib/ruby_llm/contract/step/retry_executor.rb +90 -0
- data/lib/ruby_llm/contract/step/retry_policy.rb +76 -0
- data/lib/ruby_llm/contract/step/runner.rb +126 -0
- data/lib/ruby_llm/contract/step/trace.rb +70 -0
- data/lib/ruby_llm/contract/step.rb +10 -0
- data/lib/ruby_llm/contract/token_estimator.rb +19 -0
- data/lib/ruby_llm/contract/types.rb +11 -0
- data/lib/ruby_llm/contract/version.rb +7 -0
- data/lib/ruby_llm/contract.rb +108 -0
- data/ruby_llm-contract.gemspec +33 -0
- metadata +172 -0
|
@@ -0,0 +1,143 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require "json"
|
|
4
|
+
|
|
5
|
+
module RubyLLM
|
|
6
|
+
module Contract
|
|
7
|
+
class Parser
|
|
8
|
+
extend Concerns::DeepSymbolize
|
|
9
|
+
|
|
10
|
+
def self.symbolize_keys(obj)
|
|
11
|
+
deep_symbolize(obj)
|
|
12
|
+
end
|
|
13
|
+
|
|
14
|
+
def self.parse(raw_output, strategy:)
|
|
15
|
+
case strategy
|
|
16
|
+
when :json then parse_json(raw_output)
|
|
17
|
+
when :text then raw_output
|
|
18
|
+
else raise ArgumentError, "Unknown parse strategy: #{strategy}"
|
|
19
|
+
end
|
|
20
|
+
end
|
|
21
|
+
|
|
22
|
+
def self.parse_json(raw_output)
|
|
23
|
+
return deep_symbolize(raw_output) if raw_output.is_a?(Hash) || raw_output.is_a?(Array)
|
|
24
|
+
|
|
25
|
+
# Coerce non-String scalars (boolean, numeric) to their JSON representation
|
|
26
|
+
# to prevent TypeError from JSON.parse on non-string input.
|
|
27
|
+
coerced = raw_output.is_a?(String) ? raw_output : raw_output&.to_s
|
|
28
|
+
text = strip_code_fences(strip_bom(coerced))
|
|
29
|
+
raise RubyLLM::Contract::ParseError.new("Failed to parse JSON: nil content", details: raw_output) if text.nil?
|
|
30
|
+
|
|
31
|
+
parse_json_text(text, raw_output)
|
|
32
|
+
end
|
|
33
|
+
|
|
34
|
+
def self.parse_json_text(text, raw_output)
|
|
35
|
+
JSON.parse(text, symbolize_names: true)
|
|
36
|
+
rescue JSON::ParserError
|
|
37
|
+
parse_json_with_extraction(text, raw_output)
|
|
38
|
+
end
|
|
39
|
+
private_class_method :parse_json_text
|
|
40
|
+
|
|
41
|
+
# Fallback: attempt to extract the first JSON object or array from prose
|
|
42
|
+
def self.parse_json_with_extraction(text, raw_output)
|
|
43
|
+
extracted = extract_json(text)
|
|
44
|
+
unless extracted
|
|
45
|
+
raise RubyLLM::Contract::ParseError.new(
|
|
46
|
+
"Failed to parse JSON: no valid JSON found in output", details: raw_output
|
|
47
|
+
)
|
|
48
|
+
end
|
|
49
|
+
|
|
50
|
+
JSON.parse(extracted, symbolize_names: true)
|
|
51
|
+
rescue JSON::ParserError => e
|
|
52
|
+
raise RubyLLM::Contract::ParseError.new("Failed to parse JSON: #{e.message}", details: raw_output)
|
|
53
|
+
end
|
|
54
|
+
private_class_method :parse_json_with_extraction
|
|
55
|
+
|
|
56
|
+
# Strip UTF-8 BOM (Byte Order Mark) that some LLMs/APIs prepend to output
|
|
57
|
+
UTF8_BOM = "\xEF\xBB\xBF"
|
|
58
|
+
def self.strip_bom(text)
|
|
59
|
+
return text unless text.is_a?(String)
|
|
60
|
+
|
|
61
|
+
text.delete_prefix(UTF8_BOM)
|
|
62
|
+
end
|
|
63
|
+
|
|
64
|
+
# Strip markdown code fences that LLMs commonly wrap around JSON output
|
|
65
|
+
# Handles ```json ... ```, ``` ... ```, with optional trailing whitespace
|
|
66
|
+
CODE_FENCE_PATTERN = /\A\s*```(?:json|JSON)?\s*\n(.*?)\n\s*```\s*\z/m
|
|
67
|
+
|
|
68
|
+
def self.strip_code_fences(text)
|
|
69
|
+
return text unless text.is_a?(String)
|
|
70
|
+
|
|
71
|
+
match = text.match(CODE_FENCE_PATTERN)
|
|
72
|
+
match ? match[1] : text
|
|
73
|
+
end
|
|
74
|
+
|
|
75
|
+
# Extract the first JSON object or array from text that may contain prose.
|
|
76
|
+
# Uses bracket-matching to find the outermost balanced { } or [ ] block.
|
|
77
|
+
JSON_START_PATTERN = /[{\[]/
|
|
78
|
+
|
|
79
|
+
def self.extract_json(text)
|
|
80
|
+
return nil unless text.is_a?(String)
|
|
81
|
+
|
|
82
|
+
start_idx = text.index(JSON_START_PATTERN)
|
|
83
|
+
return nil unless start_idx
|
|
84
|
+
|
|
85
|
+
scan_for_balanced_json(text, start_idx)
|
|
86
|
+
end
|
|
87
|
+
|
|
88
|
+
def self.scan_for_balanced_json(text, start_idx)
|
|
89
|
+
opening = text[start_idx]
|
|
90
|
+
closing = opening == "{" ? "}" : "]"
|
|
91
|
+
state = { depth: 0, in_string: false, escape_next: false }
|
|
92
|
+
|
|
93
|
+
(start_idx...text.length).each do |pos|
|
|
94
|
+
result = process_json_char(text[pos], opening, closing, state)
|
|
95
|
+
return text[start_idx..pos] if result == :matched
|
|
96
|
+
end
|
|
97
|
+
|
|
98
|
+
nil
|
|
99
|
+
end
|
|
100
|
+
private_class_method :scan_for_balanced_json
|
|
101
|
+
|
|
102
|
+
def self.process_json_char(char, opening, closing, state)
|
|
103
|
+
if state[:escape_next]
|
|
104
|
+
state[:escape_next] = false
|
|
105
|
+
return nil
|
|
106
|
+
end
|
|
107
|
+
|
|
108
|
+
return handle_backslash(state) if char == "\\"
|
|
109
|
+
return handle_quote(state) if char == '"'
|
|
110
|
+
return nil if state[:in_string]
|
|
111
|
+
|
|
112
|
+
handle_bracket(char, opening, closing, state)
|
|
113
|
+
end
|
|
114
|
+
private_class_method :process_json_char
|
|
115
|
+
|
|
116
|
+
def self.handle_backslash(state)
|
|
117
|
+
state[:escape_next] = true if state[:in_string]
|
|
118
|
+
nil
|
|
119
|
+
end
|
|
120
|
+
private_class_method :handle_backslash
|
|
121
|
+
|
|
122
|
+
def self.handle_quote(state)
|
|
123
|
+
state[:in_string] = !state[:in_string]
|
|
124
|
+
nil
|
|
125
|
+
end
|
|
126
|
+
private_class_method :handle_quote
|
|
127
|
+
|
|
128
|
+
def self.handle_bracket(char, opening, closing, state)
|
|
129
|
+
return adjust_depth(state, 1) if char == opening
|
|
130
|
+
return adjust_depth(state, -1) if char == closing
|
|
131
|
+
|
|
132
|
+
nil
|
|
133
|
+
end
|
|
134
|
+
private_class_method :handle_bracket
|
|
135
|
+
|
|
136
|
+
def self.adjust_depth(state, delta)
|
|
137
|
+
state[:depth] += delta
|
|
138
|
+
state[:depth].zero? ? :matched : nil
|
|
139
|
+
end
|
|
140
|
+
private_class_method :adjust_depth
|
|
141
|
+
end
|
|
142
|
+
end
|
|
143
|
+
end
|
|
@@ -0,0 +1,239 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module RubyLLM
|
|
4
|
+
module Contract
|
|
5
|
+
# Client-side validation of parsed output against an output_schema.
|
|
6
|
+
# Checks required fields, enum constraints, number ranges, and nested objects.
|
|
7
|
+
# This complements provider-side enforcement (with_schema) and catches
|
|
8
|
+
# violations when using Test adapter or providers that ignore schemas.
|
|
9
|
+
class SchemaValidator # rubocop:disable Metrics/ClassLength
|
|
10
|
+
include Concerns::DeepSymbolize
|
|
11
|
+
|
|
12
|
+
# Bundles field path, value, and constraints to reduce parameter passing
|
|
13
|
+
FieldCheck = Struct.new(:qualified, :value, :constraints)
|
|
14
|
+
|
|
15
|
+
SIZE_BOUNDS = {
|
|
16
|
+
string: { min_key: :minLength, max_key: :maxLength, metric: "length" },
|
|
17
|
+
array: { min_key: :minItems, max_key: :maxItems, metric: "array length" }
|
|
18
|
+
}.freeze
|
|
19
|
+
|
|
20
|
+
def self.validate(parsed_output, schema)
|
|
21
|
+
new(parsed_output, schema).validate
|
|
22
|
+
end
|
|
23
|
+
|
|
24
|
+
def initialize(parsed_output, schema)
|
|
25
|
+
@output = parsed_output
|
|
26
|
+
@json_schema = extract_schema(schema)
|
|
27
|
+
@errors = []
|
|
28
|
+
end
|
|
29
|
+
|
|
30
|
+
def validate
|
|
31
|
+
return [] unless @json_schema.is_a?(Hash)
|
|
32
|
+
|
|
33
|
+
return validate_non_hash_output unless @output.is_a?(Hash)
|
|
34
|
+
|
|
35
|
+
validate_object(@output, @json_schema, prefix: nil)
|
|
36
|
+
@errors
|
|
37
|
+
end
|
|
38
|
+
|
|
39
|
+
private
|
|
40
|
+
|
|
41
|
+
def validate_non_hash_output
|
|
42
|
+
expected_type = @json_schema[:type]&.to_s
|
|
43
|
+
if expected_type == "object" || @json_schema.key?(:properties)
|
|
44
|
+
["expected object, got #{@output.class}"]
|
|
45
|
+
else
|
|
46
|
+
[]
|
|
47
|
+
end
|
|
48
|
+
end
|
|
49
|
+
|
|
50
|
+
def extract_schema(schema)
|
|
51
|
+
instance = schema.is_a?(Class) ? schema.new : schema
|
|
52
|
+
json = if instance.respond_to?(:to_json_schema)
|
|
53
|
+
schema_data = instance.to_json_schema
|
|
54
|
+
schema_data[:schema] || schema_data["schema"] || schema_data
|
|
55
|
+
else
|
|
56
|
+
schema
|
|
57
|
+
end
|
|
58
|
+
deep_symbolize(json)
|
|
59
|
+
end
|
|
60
|
+
|
|
61
|
+
def validate_object(output, schema, prefix:)
|
|
62
|
+
return unless output.is_a?(Hash) && schema.is_a?(Hash)
|
|
63
|
+
|
|
64
|
+
properties = schema[:properties] || {}
|
|
65
|
+
required = schema[:required] || []
|
|
66
|
+
|
|
67
|
+
check_required(required, output, prefix: prefix)
|
|
68
|
+
check_properties(properties, output, prefix: prefix, required_fields: required)
|
|
69
|
+
check_additional_properties(output, schema, prefix: prefix)
|
|
70
|
+
end
|
|
71
|
+
|
|
72
|
+
def check_required(required, output, prefix:)
|
|
73
|
+
required.each do |field|
|
|
74
|
+
key = field.to_s.to_sym
|
|
75
|
+
qualified = qualify(prefix, field)
|
|
76
|
+
@errors << "missing required field: #{qualified}" unless output.key?(key)
|
|
77
|
+
end
|
|
78
|
+
end
|
|
79
|
+
|
|
80
|
+
def check_properties(properties, output, prefix:, required_fields: [])
|
|
81
|
+
required_syms = required_fields.map { |field| field.to_s.to_sym }
|
|
82
|
+
|
|
83
|
+
properties.each do |field, constraints|
|
|
84
|
+
key = field.to_sym
|
|
85
|
+
value = output[key]
|
|
86
|
+
qualified = qualify(prefix, field)
|
|
87
|
+
|
|
88
|
+
if value.nil?
|
|
89
|
+
check_nil_required(qualified, key, constraints, required_syms, output)
|
|
90
|
+
next
|
|
91
|
+
end
|
|
92
|
+
|
|
93
|
+
validate_field(FieldCheck.new(qualified: qualified, value: value, constraints: constraints))
|
|
94
|
+
end
|
|
95
|
+
end
|
|
96
|
+
|
|
97
|
+
def check_nil_required(qualified, key, constraints, required_syms, output)
|
|
98
|
+
return unless required_syms.include?(key) && output.key?(key)
|
|
99
|
+
|
|
100
|
+
expected = constraints[:type] || "non-null"
|
|
101
|
+
@errors << "#{qualified}: expected #{expected}, got nil"
|
|
102
|
+
end
|
|
103
|
+
|
|
104
|
+
def check_additional_properties(output, schema, prefix:)
|
|
105
|
+
return unless schema[:additionalProperties] == false
|
|
106
|
+
|
|
107
|
+
allowed_keys = (schema[:properties] || {}).keys.map { |prop_key| prop_key.to_s.to_sym }
|
|
108
|
+
extra_keys = output.keys - allowed_keys
|
|
109
|
+
|
|
110
|
+
extra_keys.each do |extra_key|
|
|
111
|
+
@errors << "#{qualify(prefix, extra_key)}: additional property not allowed"
|
|
112
|
+
end
|
|
113
|
+
end
|
|
114
|
+
|
|
115
|
+
def validate_field(field_check)
|
|
116
|
+
check_enum(field_check)
|
|
117
|
+
check_number_range(field_check)
|
|
118
|
+
check_type_constraint(field_check)
|
|
119
|
+
check_string_length(field_check)
|
|
120
|
+
check_nested(field_check)
|
|
121
|
+
end
|
|
122
|
+
|
|
123
|
+
def check_enum(field_check)
|
|
124
|
+
qualified, value, constraints = field_check.to_a
|
|
125
|
+
enum = constraints[:enum]
|
|
126
|
+
return unless enum
|
|
127
|
+
|
|
128
|
+
@errors << "#{qualified}: #{value.inspect} is not in enum #{enum.inspect}" unless enum.include?(value)
|
|
129
|
+
end
|
|
130
|
+
|
|
131
|
+
def check_number_range(field_check)
|
|
132
|
+
qualified, value, constraints = field_check.to_a
|
|
133
|
+
return unless value.is_a?(Numeric)
|
|
134
|
+
|
|
135
|
+
check_minimum(qualified, value, constraints[:minimum])
|
|
136
|
+
check_maximum(qualified, value, constraints[:maximum])
|
|
137
|
+
end
|
|
138
|
+
|
|
139
|
+
def check_type_constraint(field_check)
|
|
140
|
+
qualified, value, constraints = field_check.to_a
|
|
141
|
+
expected_type = constraints[:type]&.to_s
|
|
142
|
+
return unless expected_type
|
|
143
|
+
|
|
144
|
+
@errors << "#{qualified}: expected #{expected_type}, got #{value.class}" unless type_valid?(expected_type,
|
|
145
|
+
value)
|
|
146
|
+
end
|
|
147
|
+
|
|
148
|
+
def type_valid?(expected_type, value)
|
|
149
|
+
case expected_type
|
|
150
|
+
when "string" then value.is_a?(String)
|
|
151
|
+
when "number" then value.is_a?(Numeric)
|
|
152
|
+
when "integer" then value.is_a?(Integer)
|
|
153
|
+
when "boolean" then [true, false].include?(value)
|
|
154
|
+
when "array" then value.is_a?(Array)
|
|
155
|
+
when "object" then value.is_a?(Hash)
|
|
156
|
+
else true
|
|
157
|
+
end
|
|
158
|
+
end
|
|
159
|
+
|
|
160
|
+
def check_nested(field_check)
|
|
161
|
+
qualified, value, constraints = field_check.to_a
|
|
162
|
+
nested_type = constraints[:type]&.to_s
|
|
163
|
+
|
|
164
|
+
case nested_type
|
|
165
|
+
when "object"
|
|
166
|
+
validate_object(value, constraints, prefix: qualified) if value.is_a?(Hash)
|
|
167
|
+
when "array"
|
|
168
|
+
check_array_items(qualified, value, constraints) if value.is_a?(Array)
|
|
169
|
+
end
|
|
170
|
+
end
|
|
171
|
+
|
|
172
|
+
def check_string_length(field_check)
|
|
173
|
+
qualified, value, constraints = field_check.to_a
|
|
174
|
+
check_size_bounds(qualified, value.length, constraints, :string) if value.is_a?(String)
|
|
175
|
+
end
|
|
176
|
+
|
|
177
|
+
def check_array_length(qualified, value, constraints)
|
|
178
|
+
check_size_bounds(qualified, value.length, constraints, :array) if value.is_a?(Array)
|
|
179
|
+
end
|
|
180
|
+
|
|
181
|
+
def check_size_bounds(qualified, actual, constraints, kind)
|
|
182
|
+
bounds = SIZE_BOUNDS[kind]
|
|
183
|
+
check_size_minimum(qualified, actual, constraints[bounds[:min_key]], bounds)
|
|
184
|
+
check_size_maximum(qualified, actual, constraints[bounds[:max_key]], bounds)
|
|
185
|
+
end
|
|
186
|
+
|
|
187
|
+
def check_array_items(qualified, value, constraints)
|
|
188
|
+
check_array_length(qualified, value, constraints)
|
|
189
|
+
|
|
190
|
+
items_schema = constraints[:items]
|
|
191
|
+
return unless items_schema.is_a?(Hash)
|
|
192
|
+
|
|
193
|
+
value.each_with_index do |item, idx|
|
|
194
|
+
validate_array_item("#{qualified}[#{idx}]", item, items_schema)
|
|
195
|
+
end
|
|
196
|
+
end
|
|
197
|
+
|
|
198
|
+
def validate_array_item(item_key, item, items_schema)
|
|
199
|
+
item_type = items_schema[:type]&.to_s
|
|
200
|
+
|
|
201
|
+
if item_type == "object" && item.is_a?(Hash)
|
|
202
|
+
validate_object(item, items_schema, prefix: item_key)
|
|
203
|
+
elsif item_type == "array" && item.is_a?(Array)
|
|
204
|
+
check_array_items(item_key, item, items_schema)
|
|
205
|
+
else
|
|
206
|
+
validate_field(FieldCheck.new(qualified: item_key, value: item, constraints: items_schema))
|
|
207
|
+
end
|
|
208
|
+
end
|
|
209
|
+
|
|
210
|
+
def check_minimum(qualified, actual, limit)
|
|
211
|
+
return unless limit && actual < limit
|
|
212
|
+
|
|
213
|
+
@errors << "#{qualified}: #{actual} is below minimum #{limit}"
|
|
214
|
+
end
|
|
215
|
+
|
|
216
|
+
def check_maximum(qualified, actual, limit)
|
|
217
|
+
return unless limit && actual > limit
|
|
218
|
+
|
|
219
|
+
@errors << "#{qualified}: #{actual} is above maximum #{limit}"
|
|
220
|
+
end
|
|
221
|
+
|
|
222
|
+
def check_size_minimum(qualified, actual, limit, bounds)
|
|
223
|
+
return unless limit && actual < limit
|
|
224
|
+
|
|
225
|
+
@errors << "#{qualified}: #{bounds[:metric]} #{actual} is below #{bounds[:min_key]} #{limit}"
|
|
226
|
+
end
|
|
227
|
+
|
|
228
|
+
def check_size_maximum(qualified, actual, limit, bounds)
|
|
229
|
+
return unless limit && actual > limit
|
|
230
|
+
|
|
231
|
+
@errors << "#{qualified}: #{bounds[:metric]} #{actual} is above #{bounds[:max_key]} #{limit}"
|
|
232
|
+
end
|
|
233
|
+
|
|
234
|
+
def qualify(prefix, field)
|
|
235
|
+
prefix ? "#{prefix}.#{field}" : field.to_s
|
|
236
|
+
end
|
|
237
|
+
end
|
|
238
|
+
end
|
|
239
|
+
end
|
|
@@ -0,0 +1,104 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module RubyLLM
|
|
4
|
+
module Contract
|
|
5
|
+
class Validator
|
|
6
|
+
def validate(raw_output:, definition:, output_type:, input: nil, schema: nil)
|
|
7
|
+
effective_definition = schema ? with_json_strategy(definition) : definition
|
|
8
|
+
parsed_output = parse_output(raw_output, effective_definition)
|
|
9
|
+
return parsed_output if parse_error?(parsed_output)
|
|
10
|
+
|
|
11
|
+
effective_output = coerce_and_freeze(parsed_output, output_type, schema)
|
|
12
|
+
errors = collect_errors(effective_output, schema, definition, input)
|
|
13
|
+
|
|
14
|
+
{ parsed_output: effective_output[:value], errors: errors, status: errors.empty? ? :ok : :validation_failed }
|
|
15
|
+
end
|
|
16
|
+
|
|
17
|
+
def self.validate(raw_output:, definition:, output_type:, input: nil, schema: nil)
|
|
18
|
+
new.validate(raw_output: raw_output, definition: definition, output_type: output_type,
|
|
19
|
+
input: input, schema: schema)
|
|
20
|
+
end
|
|
21
|
+
|
|
22
|
+
private
|
|
23
|
+
|
|
24
|
+
def parse_error?(parsed_output)
|
|
25
|
+
parsed_output.is_a?(Hash) && parsed_output[:status] == :parse_error
|
|
26
|
+
end
|
|
27
|
+
|
|
28
|
+
def coerce_and_freeze(parsed_output, output_type, schema)
|
|
29
|
+
coerced_output, type_errors = validate_type(parsed_output, output_type, !schema.nil?)
|
|
30
|
+
effective = type_errors.empty? ? coerced_output : parsed_output
|
|
31
|
+
deep_freeze(effective)
|
|
32
|
+
{ value: effective, type_errors: type_errors }
|
|
33
|
+
end
|
|
34
|
+
|
|
35
|
+
def collect_errors(effective_output, schema, definition, input)
|
|
36
|
+
effective_output[:type_errors] +
|
|
37
|
+
validate_schema(effective_output[:value], schema) +
|
|
38
|
+
validate_invariants(effective_output[:value], definition, input)
|
|
39
|
+
end
|
|
40
|
+
|
|
41
|
+
def validate_type(parsed_output, output_type, has_schema)
|
|
42
|
+
return [parsed_output, []] if has_schema
|
|
43
|
+
|
|
44
|
+
if output_type.is_a?(Class) && !output_type.respond_to?(:[])
|
|
45
|
+
raise TypeError, "expected #{output_type}, got #{parsed_output.class}" unless parsed_output.is_a?(output_type)
|
|
46
|
+
|
|
47
|
+
[parsed_output, []]
|
|
48
|
+
else
|
|
49
|
+
coerced = output_type[parsed_output]
|
|
50
|
+
[coerced, []]
|
|
51
|
+
end
|
|
52
|
+
rescue Dry::Types::CoercionError, TypeError, ArgumentError => e
|
|
53
|
+
[parsed_output, [e.message]]
|
|
54
|
+
end
|
|
55
|
+
|
|
56
|
+
def validate_schema(parsed_output, schema)
|
|
57
|
+
return [] unless schema
|
|
58
|
+
|
|
59
|
+
SchemaValidator.validate(parsed_output, schema)
|
|
60
|
+
end
|
|
61
|
+
|
|
62
|
+
def validate_invariants(parsed_output, definition, input)
|
|
63
|
+
definition.invariants.each_with_object([]) do |inv, errors|
|
|
64
|
+
passed = inv.call(parsed_output, input: input)
|
|
65
|
+
if passed.nil?
|
|
66
|
+
warn "[ruby_llm-contract] validate(\"#{inv.description}\") returned nil. " \
|
|
67
|
+
"This usually means a key mismatch (string vs symbol). " \
|
|
68
|
+
"Output keys are always symbols."
|
|
69
|
+
end
|
|
70
|
+
errors << inv.description unless passed
|
|
71
|
+
rescue StandardError => e
|
|
72
|
+
errors << "#{inv.description} (raised #{e.class}: #{e.message})"
|
|
73
|
+
end
|
|
74
|
+
end
|
|
75
|
+
|
|
76
|
+
def parse_output(raw_output, definition)
|
|
77
|
+
Parser.parse(raw_output, strategy: definition.parse_strategy)
|
|
78
|
+
rescue RubyLLM::Contract::ParseError => e
|
|
79
|
+
{ parsed_output: nil, errors: [e.message], status: :parse_error }
|
|
80
|
+
end
|
|
81
|
+
|
|
82
|
+
def with_json_strategy(definition)
|
|
83
|
+
return definition if definition.parse_strategy == :json
|
|
84
|
+
|
|
85
|
+
Definition.new { parse :json }
|
|
86
|
+
end
|
|
87
|
+
|
|
88
|
+
def deep_freeze(obj)
|
|
89
|
+
case obj
|
|
90
|
+
when Hash
|
|
91
|
+
obj.each_value { |element| deep_freeze(element) }
|
|
92
|
+
obj.freeze
|
|
93
|
+
when Array
|
|
94
|
+
obj.each { |element| deep_freeze(element) }
|
|
95
|
+
obj.freeze
|
|
96
|
+
when String
|
|
97
|
+
obj.freeze
|
|
98
|
+
else
|
|
99
|
+
obj
|
|
100
|
+
end
|
|
101
|
+
end
|
|
102
|
+
end
|
|
103
|
+
end
|
|
104
|
+
end
|
|
@@ -0,0 +1,38 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module RubyLLM
|
|
4
|
+
module Contract
|
|
5
|
+
module CostCalculator
|
|
6
|
+
def self.calculate(model_name:, usage:)
|
|
7
|
+
return nil unless model_name && usage.is_a?(Hash)
|
|
8
|
+
|
|
9
|
+
model_info = find_model(model_name)
|
|
10
|
+
return nil unless model_info
|
|
11
|
+
|
|
12
|
+
compute_cost(model_info, usage)
|
|
13
|
+
rescue StandardError
|
|
14
|
+
nil
|
|
15
|
+
end
|
|
16
|
+
|
|
17
|
+
def self.compute_cost(model_info, usage)
|
|
18
|
+
input_cost = token_cost(usage[:input_tokens], model_info.input_price_per_million)
|
|
19
|
+
output_cost = token_cost(usage[:output_tokens], model_info.output_price_per_million)
|
|
20
|
+
(input_cost + output_cost).round(6)
|
|
21
|
+
end
|
|
22
|
+
|
|
23
|
+
def self.token_cost(tokens, price_per_million)
|
|
24
|
+
(tokens || 0) * (price_per_million || 0) / 1_000_000.0
|
|
25
|
+
end
|
|
26
|
+
|
|
27
|
+
def self.find_model(model_name)
|
|
28
|
+
return nil unless defined?(RubyLLM)
|
|
29
|
+
|
|
30
|
+
RubyLLM.models.find(model_name)
|
|
31
|
+
rescue StandardError
|
|
32
|
+
nil
|
|
33
|
+
end
|
|
34
|
+
|
|
35
|
+
private_class_method :compute_cost, :token_cost, :find_model
|
|
36
|
+
end
|
|
37
|
+
end
|
|
38
|
+
end
|
|
@@ -0,0 +1,13 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module RubyLLM
|
|
4
|
+
module Contract
|
|
5
|
+
# Include this module to get `Types` constant as a shortcut for RubyLLM::Contract::Types.
|
|
6
|
+
# Usage: `include RubyLLM::Contract::DSL` at the top of your file or class.
|
|
7
|
+
module DSL
|
|
8
|
+
def self.included(base)
|
|
9
|
+
base.const_set(:Types, RubyLLM::Contract::Types) unless base.const_defined?(:Types)
|
|
10
|
+
end
|
|
11
|
+
end
|
|
12
|
+
end
|
|
13
|
+
end
|
|
@@ -0,0 +1,19 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module RubyLLM
|
|
4
|
+
module Contract
|
|
5
|
+
class Error < StandardError
|
|
6
|
+
attr_reader :details
|
|
7
|
+
|
|
8
|
+
def initialize(message = nil, details: nil)
|
|
9
|
+
@details = details
|
|
10
|
+
super(message)
|
|
11
|
+
end
|
|
12
|
+
end
|
|
13
|
+
|
|
14
|
+
class InputError < Error; end
|
|
15
|
+
class ParseError < Error; end
|
|
16
|
+
class ContractError < Error; end
|
|
17
|
+
class AdapterError < Error; end
|
|
18
|
+
end
|
|
19
|
+
end
|
|
@@ -0,0 +1,76 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module RubyLLM
|
|
4
|
+
module Contract
|
|
5
|
+
module Eval
|
|
6
|
+
class CaseResult
|
|
7
|
+
attr_reader :name, :input, :output, :expected, :step_status,
|
|
8
|
+
:score, :details, :duration_ms, :cost
|
|
9
|
+
|
|
10
|
+
def initialize(name:, input:, output:, expected:, step_status:,
|
|
11
|
+
score:, passed:, label: nil, details: nil, duration_ms: nil, cost: nil)
|
|
12
|
+
@name = name
|
|
13
|
+
@input = input
|
|
14
|
+
@output = output
|
|
15
|
+
@expected = expected
|
|
16
|
+
@step_status = step_status
|
|
17
|
+
@score = score.to_f.clamp(0.0, 1.0)
|
|
18
|
+
@passed = passed
|
|
19
|
+
@label = label
|
|
20
|
+
@details = details
|
|
21
|
+
@duration_ms = duration_ms
|
|
22
|
+
@cost = cost
|
|
23
|
+
freeze
|
|
24
|
+
end
|
|
25
|
+
|
|
26
|
+
def passed?
|
|
27
|
+
@passed
|
|
28
|
+
end
|
|
29
|
+
|
|
30
|
+
def failed?
|
|
31
|
+
!@passed
|
|
32
|
+
end
|
|
33
|
+
|
|
34
|
+
def label
|
|
35
|
+
@label || (@passed ? "PASS" : "FAIL")
|
|
36
|
+
end
|
|
37
|
+
|
|
38
|
+
def mismatches
|
|
39
|
+
return {} unless @expected.is_a?(Hash) && @output.is_a?(Hash)
|
|
40
|
+
|
|
41
|
+
@expected.each_with_object({}) do |(key, value), result|
|
|
42
|
+
actual = @output[key]
|
|
43
|
+
next if match?(value, actual)
|
|
44
|
+
|
|
45
|
+
result[key] = { expected: value, got: actual }
|
|
46
|
+
end
|
|
47
|
+
end
|
|
48
|
+
|
|
49
|
+
def to_h
|
|
50
|
+
{
|
|
51
|
+
name: @name,
|
|
52
|
+
input: @input,
|
|
53
|
+
output: @output,
|
|
54
|
+
expected: @expected,
|
|
55
|
+
step_status: @step_status,
|
|
56
|
+
score: @score,
|
|
57
|
+
passed: @passed,
|
|
58
|
+
label: label,
|
|
59
|
+
details: @details,
|
|
60
|
+
duration_ms: @duration_ms,
|
|
61
|
+
cost: @cost
|
|
62
|
+
}
|
|
63
|
+
end
|
|
64
|
+
|
|
65
|
+
private
|
|
66
|
+
|
|
67
|
+
def match?(expected_value, actual)
|
|
68
|
+
case expected_value
|
|
69
|
+
when ::Regexp then actual.to_s.match?(expected_value)
|
|
70
|
+
else expected_value == actual
|
|
71
|
+
end
|
|
72
|
+
end
|
|
73
|
+
end
|
|
74
|
+
end
|
|
75
|
+
end
|
|
76
|
+
end
|
|
@@ -0,0 +1,47 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module RubyLLM
|
|
4
|
+
module Contract
|
|
5
|
+
module Eval
|
|
6
|
+
# Extracted from Runner to reduce class length.
|
|
7
|
+
# Builds contract detail strings for contract-only evaluation.
|
|
8
|
+
module ContractDetailBuilder
|
|
9
|
+
private
|
|
10
|
+
|
|
11
|
+
def build_contract_details
|
|
12
|
+
parts = ["contract passed"]
|
|
13
|
+
append_schema_details(parts)
|
|
14
|
+
append_invariant_details(parts)
|
|
15
|
+
parts.join(", ")
|
|
16
|
+
end
|
|
17
|
+
|
|
18
|
+
def append_schema_details(parts)
|
|
19
|
+
return unless @step.respond_to?(:output_schema)
|
|
20
|
+
|
|
21
|
+
schema = @step.output_schema
|
|
22
|
+
return unless schema
|
|
23
|
+
|
|
24
|
+
field_count = begin
|
|
25
|
+
schema.properties.size
|
|
26
|
+
rescue StandardError
|
|
27
|
+
0
|
|
28
|
+
end
|
|
29
|
+
parts << "schema: #{field_count} fields" if field_count.positive?
|
|
30
|
+
end
|
|
31
|
+
|
|
32
|
+
def append_invariant_details(parts)
|
|
33
|
+
return unless @step.respond_to?(:contract)
|
|
34
|
+
|
|
35
|
+
invariant_count = begin
|
|
36
|
+
@step.contract.invariants.size
|
|
37
|
+
rescue StandardError
|
|
38
|
+
0
|
|
39
|
+
end
|
|
40
|
+
class_validates = @step.instance_variable_get(:@class_validates)&.size || 0
|
|
41
|
+
total = invariant_count + class_validates
|
|
42
|
+
parts << "validates: #{total} passed" if total.positive?
|
|
43
|
+
end
|
|
44
|
+
end
|
|
45
|
+
end
|
|
46
|
+
end
|
|
47
|
+
end
|