ruby_llm-contract 0.4.2 → 0.5.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.rubycritic.yml +8 -0
- data/.simplecov +22 -0
- data/CHANGELOG.md +59 -0
- data/Gemfile +2 -0
- data/Gemfile.lock +104 -2
- data/README.md +42 -2
- data/lib/ruby_llm/contract/concerns/context_helpers.rb +11 -10
- data/lib/ruby_llm/contract/concerns/deep_freeze.rb +13 -7
- data/lib/ruby_llm/contract/concerns/deep_symbolize.rb +15 -5
- data/lib/ruby_llm/contract/concerns/eval_host.rb +51 -7
- data/lib/ruby_llm/contract/contract/schema_validator/bound_rule.rb +85 -0
- data/lib/ruby_llm/contract/contract/schema_validator/enum_rule.rb +23 -0
- data/lib/ruby_llm/contract/contract/schema_validator/node.rb +70 -0
- data/lib/ruby_llm/contract/contract/schema_validator/object_rules.rb +66 -0
- data/lib/ruby_llm/contract/contract/schema_validator/scalar_rules.rb +22 -0
- data/lib/ruby_llm/contract/contract/schema_validator/schema_extractor.rb +23 -0
- data/lib/ruby_llm/contract/contract/schema_validator/type_rule.rb +30 -0
- data/lib/ruby_llm/contract/contract/schema_validator.rb +41 -266
- data/lib/ruby_llm/contract/contract/validator.rb +9 -0
- data/lib/ruby_llm/contract/cost_calculator.rb +41 -1
- data/lib/ruby_llm/contract/eval/case_executor.rb +52 -0
- data/lib/ruby_llm/contract/eval/case_result_builder.rb +35 -0
- data/lib/ruby_llm/contract/eval/case_scorer.rb +66 -0
- data/lib/ruby_llm/contract/eval/evaluator/exact.rb +8 -6
- data/lib/ruby_llm/contract/eval/evaluator/proc_evaluator.rb +22 -10
- data/lib/ruby_llm/contract/eval/evaluator/regex.rb +11 -8
- data/lib/ruby_llm/contract/eval/expectation_evaluator.rb +26 -0
- data/lib/ruby_llm/contract/eval/prompt_diff.rb +39 -0
- data/lib/ruby_llm/contract/eval/prompt_diff_comparator.rb +116 -0
- data/lib/ruby_llm/contract/eval/prompt_diff_presenter.rb +99 -0
- data/lib/ruby_llm/contract/eval/prompt_diff_serializer.rb +23 -0
- data/lib/ruby_llm/contract/eval/report.rb +19 -191
- data/lib/ruby_llm/contract/eval/report_presenter.rb +65 -0
- data/lib/ruby_llm/contract/eval/report_stats.rb +65 -0
- data/lib/ruby_llm/contract/eval/report_storage.rb +107 -0
- data/lib/ruby_llm/contract/eval/runner.rb +30 -207
- data/lib/ruby_llm/contract/eval/step_expectation_applier.rb +67 -0
- data/lib/ruby_llm/contract/eval/step_result_normalizer.rb +39 -0
- data/lib/ruby_llm/contract/eval.rb +13 -0
- data/lib/ruby_llm/contract/minitest.rb +116 -2
- data/lib/ruby_llm/contract/pipeline/base.rb +15 -2
- data/lib/ruby_llm/contract/rake_task.rb +20 -1
- data/lib/ruby_llm/contract/rspec/helpers.rb +91 -6
- data/lib/ruby_llm/contract/rspec/pass_eval.rb +84 -3
- data/lib/ruby_llm/contract/rspec.rb +18 -0
- data/lib/ruby_llm/contract/step/adapter_caller.rb +23 -0
- data/lib/ruby_llm/contract/step/base.rb +94 -37
- data/lib/ruby_llm/contract/step/dsl.rb +61 -16
- data/lib/ruby_llm/contract/step/input_validator.rb +34 -0
- data/lib/ruby_llm/contract/step/limit_checker.rb +28 -11
- data/lib/ruby_llm/contract/step/prompt_compiler.rb +33 -0
- data/lib/ruby_llm/contract/step/result.rb +3 -2
- data/lib/ruby_llm/contract/step/result_builder.rb +60 -0
- data/lib/ruby_llm/contract/step/retry_executor.rb +1 -0
- data/lib/ruby_llm/contract/step/runner.rb +47 -84
- data/lib/ruby_llm/contract/step/runner_config.rb +37 -0
- data/lib/ruby_llm/contract/step.rb +5 -0
- data/lib/ruby_llm/contract/version.rb +1 -1
- data/lib/ruby_llm/contract.rb +28 -0
- metadata +28 -1
|
@@ -9,7 +9,7 @@ module RubyLLM
|
|
|
9
9
|
private
|
|
10
10
|
|
|
11
11
|
def check_limits(messages)
|
|
12
|
-
return nil unless
|
|
12
|
+
return nil unless max_input || max_cost
|
|
13
13
|
|
|
14
14
|
estimated = TokenEstimator.estimate(messages)
|
|
15
15
|
errors = collect_limit_errors(estimated)
|
|
@@ -21,27 +21,44 @@ module RubyLLM
|
|
|
21
21
|
|
|
22
22
|
def collect_limit_errors(estimated)
|
|
23
23
|
errors = []
|
|
24
|
-
if
|
|
25
|
-
errors << "Input token limit exceeded: estimated #{estimated} tokens, max #{
|
|
24
|
+
if max_input && estimated > max_input
|
|
25
|
+
errors << "Input token limit exceeded: estimated #{estimated} tokens, max #{max_input}"
|
|
26
26
|
end
|
|
27
|
-
append_cost_error(estimated, errors) if
|
|
27
|
+
append_cost_error(estimated, errors) if max_cost
|
|
28
28
|
errors
|
|
29
29
|
end
|
|
30
30
|
|
|
31
|
+
# Default output estimate when max_output is not set.
|
|
32
|
+
# Uses input token count as a conservative proxy — most LLM responses
|
|
33
|
+
# are shorter than the input, so this overestimates slightly.
|
|
34
|
+
# Without this, output cost is zero and max_cost can be bypassed
|
|
35
|
+
# for models expensive on completion side.
|
|
36
|
+
DEFAULT_OUTPUT_RATIO = 1
|
|
37
|
+
|
|
31
38
|
def append_cost_error(estimated, errors)
|
|
32
|
-
estimated_output = effective_max_output ||
|
|
39
|
+
estimated_output = effective_max_output || (estimated * DEFAULT_OUTPUT_RATIO)
|
|
33
40
|
estimated_cost = CostCalculator.calculate(
|
|
34
|
-
model_name:
|
|
41
|
+
model_name: model_name,
|
|
35
42
|
usage: { input_tokens: estimated, output_tokens: estimated_output }
|
|
36
43
|
)
|
|
37
44
|
|
|
38
45
|
if estimated_cost.nil?
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
elsif estimated_cost > @max_cost
|
|
46
|
+
handle_unknown_pricing(errors)
|
|
47
|
+
elsif estimated_cost > max_cost
|
|
42
48
|
errors << "Cost limit exceeded: estimated $#{format("%.6f", estimated_cost)} " \
|
|
43
49
|
"(#{estimated} input + #{estimated_output} output tokens), " \
|
|
44
|
-
"max $#{format("%.6f",
|
|
50
|
+
"max $#{format("%.6f", max_cost)}"
|
|
51
|
+
end
|
|
52
|
+
end
|
|
53
|
+
|
|
54
|
+
def handle_unknown_pricing(errors)
|
|
55
|
+
if on_unknown_pricing == :warn
|
|
56
|
+
warn "[ruby_llm-contract] max_cost is configured but model '#{model_name}' " \
|
|
57
|
+
"has no pricing data — cost limit not enforced"
|
|
58
|
+
else
|
|
59
|
+
errors << "max_cost is set but model '#{model_name}' has no pricing data. " \
|
|
60
|
+
"Register pricing via CostCalculator.register_model or set " \
|
|
61
|
+
"on_unknown_pricing: :warn to proceed without cost checks."
|
|
45
62
|
end
|
|
46
63
|
end
|
|
47
64
|
|
|
@@ -52,7 +69,7 @@ module RubyLLM
|
|
|
52
69
|
parsed_output: nil,
|
|
53
70
|
validation_errors: errors,
|
|
54
71
|
trace: Trace.new(
|
|
55
|
-
messages: messages, model:
|
|
72
|
+
messages: messages, model: model_name,
|
|
56
73
|
usage: { input_tokens: 0, output_tokens: 0, estimated_input_tokens: estimated,
|
|
57
74
|
estimate_method: :heuristic }
|
|
58
75
|
)
|
|
@@ -0,0 +1,33 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module RubyLLM
|
|
4
|
+
module Contract
|
|
5
|
+
module Step
|
|
6
|
+
class PromptCompiler
|
|
7
|
+
def initialize(prompt_block:)
|
|
8
|
+
@prompt_block = prompt_block
|
|
9
|
+
end
|
|
10
|
+
|
|
11
|
+
def call(input)
|
|
12
|
+
dynamic_prompt = @prompt_block.arity >= 1
|
|
13
|
+
builder_input = dynamic_prompt ? input : nil
|
|
14
|
+
ast = Prompt::Builder.build(input: builder_input, &@prompt_block)
|
|
15
|
+
|
|
16
|
+
Prompt::Renderer.render(ast, variables: template_variables_for(input, dynamic_prompt))
|
|
17
|
+
rescue StandardError => error
|
|
18
|
+
raise RubyLLM::Contract::Error, "Prompt build failed: #{error.class}: #{error.message}"
|
|
19
|
+
end
|
|
20
|
+
|
|
21
|
+
private
|
|
22
|
+
|
|
23
|
+
def template_variables_for(input, dynamic_prompt)
|
|
24
|
+
return {} if dynamic_prompt
|
|
25
|
+
|
|
26
|
+
{ input: input }.tap do |variables|
|
|
27
|
+
variables.merge!(input.transform_keys(&:to_sym)) if input.is_a?(Hash)
|
|
28
|
+
end
|
|
29
|
+
end
|
|
30
|
+
end
|
|
31
|
+
end
|
|
32
|
+
end
|
|
33
|
+
end
|
|
@@ -4,13 +4,14 @@ module RubyLLM
|
|
|
4
4
|
module Contract
|
|
5
5
|
module Step
|
|
6
6
|
class Result
|
|
7
|
-
attr_reader :status, :raw_output, :parsed_output, :validation_errors, :trace
|
|
7
|
+
attr_reader :status, :raw_output, :parsed_output, :validation_errors, :trace, :observations
|
|
8
8
|
|
|
9
|
-
def initialize(status:, raw_output:, parsed_output:, validation_errors: [], trace: nil)
|
|
9
|
+
def initialize(status:, raw_output:, parsed_output:, validation_errors: [], trace: nil, observations: [])
|
|
10
10
|
@status = status
|
|
11
11
|
@raw_output = raw_output
|
|
12
12
|
@parsed_output = parsed_output
|
|
13
13
|
@validation_errors = validation_errors.freeze
|
|
14
|
+
@observations = observations.freeze
|
|
14
15
|
@trace = normalize_trace(trace)
|
|
15
16
|
freeze
|
|
16
17
|
end
|
|
@@ -0,0 +1,60 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module RubyLLM
|
|
4
|
+
module Contract
|
|
5
|
+
module Step
|
|
6
|
+
class ResultBuilder
|
|
7
|
+
def initialize(contract_definition:, output_type:, output_schema:, model:, observers:)
|
|
8
|
+
@contract_definition = contract_definition
|
|
9
|
+
@output_type = output_type
|
|
10
|
+
@output_schema = output_schema
|
|
11
|
+
@model = model
|
|
12
|
+
@observers = observers
|
|
13
|
+
end
|
|
14
|
+
|
|
15
|
+
def error_result(error_result:, messages:)
|
|
16
|
+
Result.new(
|
|
17
|
+
status: error_result.status,
|
|
18
|
+
raw_output: error_result.raw_output,
|
|
19
|
+
parsed_output: error_result.parsed_output,
|
|
20
|
+
validation_errors: error_result.validation_errors,
|
|
21
|
+
trace: Trace.new(messages: messages, model: @model)
|
|
22
|
+
)
|
|
23
|
+
end
|
|
24
|
+
|
|
25
|
+
def success_result(response:, messages:, latency_ms:, input:)
|
|
26
|
+
raw_output = response.content
|
|
27
|
+
validation_result = validate_output(raw_output, input)
|
|
28
|
+
trace = Trace.new(messages: messages, model: @model, latency_ms: latency_ms, usage: response.usage)
|
|
29
|
+
|
|
30
|
+
Result.new(
|
|
31
|
+
status: validation_result[:status],
|
|
32
|
+
raw_output: raw_output,
|
|
33
|
+
parsed_output: validation_result[:parsed_output],
|
|
34
|
+
validation_errors: validation_result[:errors],
|
|
35
|
+
trace: trace,
|
|
36
|
+
observations: observations_for(validation_result, input)
|
|
37
|
+
)
|
|
38
|
+
end
|
|
39
|
+
|
|
40
|
+
private
|
|
41
|
+
|
|
42
|
+
def observations_for(validation_result, input)
|
|
43
|
+
return [] unless validation_result[:status] == :ok && @observers.any?
|
|
44
|
+
|
|
45
|
+
Validator.run_observations(@observers, validation_result[:parsed_output], input: input)
|
|
46
|
+
end
|
|
47
|
+
|
|
48
|
+
def validate_output(raw_output, input)
|
|
49
|
+
Validator.validate(
|
|
50
|
+
raw_output: raw_output,
|
|
51
|
+
definition: @contract_definition,
|
|
52
|
+
output_type: @output_type,
|
|
53
|
+
input: input,
|
|
54
|
+
schema: @output_schema
|
|
55
|
+
)
|
|
56
|
+
end
|
|
57
|
+
end
|
|
58
|
+
end
|
|
59
|
+
end
|
|
60
|
+
end
|
|
@@ -32,6 +32,7 @@ module RubyLLM
|
|
|
32
32
|
Result.new(
|
|
33
33
|
status: last.status, raw_output: last.raw_output,
|
|
34
34
|
parsed_output: last.parsed_output, validation_errors: last.validation_errors,
|
|
35
|
+
observations: last.observations,
|
|
35
36
|
trace: last.trace.merge(
|
|
36
37
|
attempts: attempt_log, usage: aggregated_usage,
|
|
37
38
|
cost: total_cost, latency_ms: total_latency
|
|
@@ -8,26 +8,31 @@ module RubyLLM
|
|
|
8
8
|
|
|
9
9
|
def initialize(input_type:, output_type:, prompt_block:, contract_definition:,
|
|
10
10
|
adapter:, model:, output_schema: nil, max_output: nil,
|
|
11
|
-
max_input: nil, max_cost: nil,
|
|
12
|
-
|
|
13
|
-
@
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
11
|
+
max_input: nil, max_cost: nil, on_unknown_pricing: :refuse,
|
|
12
|
+
temperature: nil, extra_options: {}, observers: [])
|
|
13
|
+
@config = RunnerConfig.new(
|
|
14
|
+
input_type: input_type,
|
|
15
|
+
output_type: output_type,
|
|
16
|
+
prompt_block: prompt_block,
|
|
17
|
+
contract_definition: contract_definition,
|
|
18
|
+
adapter: adapter,
|
|
19
|
+
model: model,
|
|
20
|
+
output_schema: output_schema,
|
|
21
|
+
max_output: max_output,
|
|
22
|
+
max_input: max_input,
|
|
23
|
+
max_cost: max_cost,
|
|
24
|
+
on_unknown_pricing: on_unknown_pricing,
|
|
25
|
+
temperature: temperature,
|
|
26
|
+
extra_options: extra_options,
|
|
27
|
+
observers: observers
|
|
28
|
+
)
|
|
24
29
|
end
|
|
25
30
|
|
|
26
31
|
def call(input)
|
|
27
|
-
validated_input =
|
|
32
|
+
validated_input = input_validator.call(input)
|
|
28
33
|
return validated_input if validated_input.is_a?(Result)
|
|
29
34
|
|
|
30
|
-
messages =
|
|
35
|
+
messages = prompt_compiler.call(input)
|
|
31
36
|
rescue RubyLLM::Contract::Error => e
|
|
32
37
|
Result.new(status: :input_error, raw_output: nil, parsed_output: nil,
|
|
33
38
|
validation_errors: [e.message])
|
|
@@ -41,94 +46,52 @@ module RubyLLM
|
|
|
41
46
|
limit_result = check_limits(messages)
|
|
42
47
|
return limit_result if limit_result
|
|
43
48
|
|
|
44
|
-
response, latency_ms =
|
|
45
|
-
return
|
|
49
|
+
response, latency_ms = adapter_caller.call(messages)
|
|
50
|
+
return result_builder.error_result(error_result: response, messages: messages) if response.is_a?(Result)
|
|
46
51
|
|
|
47
|
-
|
|
52
|
+
result_builder.success_result(response: response, messages: messages, latency_ms: latency_ms, input: input)
|
|
48
53
|
end
|
|
49
54
|
|
|
50
|
-
def
|
|
51
|
-
|
|
52
|
-
if type.is_a?(Class) && !type.respond_to?(:[])
|
|
53
|
-
raise TypeError, "#{input.inspect} is not a #{type}" unless input.is_a?(type)
|
|
54
|
-
else
|
|
55
|
-
type[input]
|
|
56
|
-
end
|
|
57
|
-
nil
|
|
58
|
-
rescue Dry::Types::CoercionError, TypeError, ArgumentError => e
|
|
59
|
-
Result.new(status: :input_error, raw_output: nil, parsed_output: nil, validation_errors: [e.message])
|
|
55
|
+
def input_validator
|
|
56
|
+
InputValidator.new(input_type: @config.input_type)
|
|
60
57
|
end
|
|
61
58
|
|
|
62
|
-
def
|
|
63
|
-
|
|
64
|
-
ast = Prompt::Builder.build(input: dynamic ? input : nil, &@prompt_block)
|
|
65
|
-
|
|
66
|
-
Prompt::Renderer.render(ast, variables: dynamic ? {} : template_variables_for(input))
|
|
67
|
-
rescue StandardError => e
|
|
68
|
-
raise RubyLLM::Contract::Error, "Prompt build failed: #{e.class}: #{e.message}"
|
|
59
|
+
def prompt_compiler
|
|
60
|
+
PromptCompiler.new(prompt_block: @config.prompt_block)
|
|
69
61
|
end
|
|
70
62
|
|
|
71
|
-
def
|
|
72
|
-
|
|
73
|
-
input.is_a?(Hash) ? base.merge(input.transform_keys(&:to_sym)) : base
|
|
63
|
+
def adapter_caller
|
|
64
|
+
AdapterCaller.new(adapter: @config.adapter, adapter_options: @config.adapter_options)
|
|
74
65
|
end
|
|
75
66
|
|
|
76
|
-
def
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
67
|
+
def result_builder
|
|
68
|
+
ResultBuilder.new(
|
|
69
|
+
contract_definition: @config.contract_definition,
|
|
70
|
+
output_type: @config.output_type,
|
|
71
|
+
output_schema: @config.output_schema,
|
|
72
|
+
model: @config.model,
|
|
73
|
+
observers: @config.observers
|
|
74
|
+
)
|
|
83
75
|
end
|
|
84
76
|
|
|
85
|
-
def
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
{ model: @model }.tap do |opts|
|
|
89
|
-
opts[:schema] = @output_schema if @output_schema
|
|
90
|
-
opts[:max_tokens] = effective_max_tokens if effective_max_tokens
|
|
91
|
-
opts[:temperature] = @temperature if @temperature
|
|
92
|
-
@extra_options.each { |k, v| opts[k] = v unless opts.key?(k) }
|
|
93
|
-
end
|
|
77
|
+
def max_input
|
|
78
|
+
@config.max_input
|
|
94
79
|
end
|
|
95
80
|
|
|
96
|
-
def
|
|
97
|
-
@
|
|
81
|
+
def max_cost
|
|
82
|
+
@config.max_cost
|
|
98
83
|
end
|
|
99
84
|
|
|
100
|
-
def
|
|
101
|
-
|
|
102
|
-
status: error_result.status,
|
|
103
|
-
raw_output: error_result.raw_output,
|
|
104
|
-
parsed_output: error_result.parsed_output,
|
|
105
|
-
validation_errors: error_result.validation_errors,
|
|
106
|
-
trace: Trace.new(messages: messages, model: @model)
|
|
107
|
-
)
|
|
85
|
+
def model_name
|
|
86
|
+
@config.model
|
|
108
87
|
end
|
|
109
88
|
|
|
110
|
-
def
|
|
111
|
-
|
|
112
|
-
validation_result = validate_output(raw_output, input)
|
|
113
|
-
trace = Trace.new(messages: messages, model: @model, latency_ms: latency_ms, usage: response.usage)
|
|
114
|
-
|
|
115
|
-
Result.new(
|
|
116
|
-
status: validation_result[:status],
|
|
117
|
-
raw_output: raw_output,
|
|
118
|
-
parsed_output: validation_result[:parsed_output],
|
|
119
|
-
validation_errors: validation_result[:errors],
|
|
120
|
-
trace: trace
|
|
121
|
-
)
|
|
89
|
+
def on_unknown_pricing
|
|
90
|
+
@config.on_unknown_pricing
|
|
122
91
|
end
|
|
123
92
|
|
|
124
|
-
def
|
|
125
|
-
|
|
126
|
-
raw_output: raw_output,
|
|
127
|
-
definition: @contract_definition,
|
|
128
|
-
output_type: @output_type,
|
|
129
|
-
input: input,
|
|
130
|
-
schema: @output_schema
|
|
131
|
-
)
|
|
93
|
+
def effective_max_output
|
|
94
|
+
@config.effective_max_output
|
|
132
95
|
end
|
|
133
96
|
end
|
|
134
97
|
end
|
|
@@ -0,0 +1,37 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module RubyLLM
|
|
4
|
+
module Contract
|
|
5
|
+
module Step
|
|
6
|
+
RunnerConfig = Data.define(
|
|
7
|
+
:input_type,
|
|
8
|
+
:output_type,
|
|
9
|
+
:prompt_block,
|
|
10
|
+
:contract_definition,
|
|
11
|
+
:adapter,
|
|
12
|
+
:model,
|
|
13
|
+
:output_schema,
|
|
14
|
+
:max_output,
|
|
15
|
+
:max_input,
|
|
16
|
+
:max_cost,
|
|
17
|
+
:on_unknown_pricing,
|
|
18
|
+
:temperature,
|
|
19
|
+
:extra_options,
|
|
20
|
+
:observers
|
|
21
|
+
) do
|
|
22
|
+
def effective_max_output
|
|
23
|
+
extra_options[:max_tokens] || max_output
|
|
24
|
+
end
|
|
25
|
+
|
|
26
|
+
def adapter_options
|
|
27
|
+
{ model: model }.tap do |options|
|
|
28
|
+
options[:schema] = output_schema if output_schema
|
|
29
|
+
options[:max_tokens] = effective_max_output if effective_max_output
|
|
30
|
+
options[:temperature] = temperature if temperature
|
|
31
|
+
extra_options.each { |key, value| options[key] = value unless options.key?(key) }
|
|
32
|
+
end
|
|
33
|
+
end
|
|
34
|
+
end
|
|
35
|
+
end
|
|
36
|
+
end
|
|
37
|
+
end
|
|
@@ -3,6 +3,11 @@
|
|
|
3
3
|
require_relative "step/trace"
|
|
4
4
|
require_relative "step/result"
|
|
5
5
|
require_relative "step/limit_checker"
|
|
6
|
+
require_relative "step/runner_config"
|
|
7
|
+
require_relative "step/input_validator"
|
|
8
|
+
require_relative "step/prompt_compiler"
|
|
9
|
+
require_relative "step/adapter_caller"
|
|
10
|
+
require_relative "step/result_builder"
|
|
6
11
|
require_relative "step/runner"
|
|
7
12
|
require_relative "step/retry_policy"
|
|
8
13
|
require_relative "step/retry_executor"
|
data/lib/ruby_llm/contract.rb
CHANGED
|
@@ -18,6 +18,7 @@ module RubyLLM
|
|
|
18
18
|
|
|
19
19
|
def reset_configuration!
|
|
20
20
|
@configuration = Configuration.new
|
|
21
|
+
step_adapter_overrides.clear
|
|
21
22
|
end
|
|
22
23
|
|
|
23
24
|
# --- Eval host registry ---
|
|
@@ -40,6 +41,15 @@ module RubyLLM
|
|
|
40
41
|
@eval_hosts = []
|
|
41
42
|
end
|
|
42
43
|
|
|
44
|
+
# Thread-local per-step adapter overrides used by test helpers (RSpec + Minitest).
|
|
45
|
+
def step_adapter_overrides
|
|
46
|
+
Thread.current[:ruby_llm_contract_step_overrides] ||= {}
|
|
47
|
+
end
|
|
48
|
+
|
|
49
|
+
def step_adapter_overrides=(map)
|
|
50
|
+
Thread.current[:ruby_llm_contract_step_overrides] = map
|
|
51
|
+
end
|
|
52
|
+
|
|
43
53
|
def load_evals!(*dirs)
|
|
44
54
|
dirs = dirs.flatten.compact
|
|
45
55
|
if dirs.empty? && defined?(::Rails)
|
|
@@ -102,6 +112,21 @@ module RubyLLM
|
|
|
102
112
|
nil
|
|
103
113
|
end
|
|
104
114
|
end
|
|
115
|
+
|
|
116
|
+
# One-time prepend on Step::Base that checks the override map before
|
|
117
|
+
# falling through to the normal adapter resolution.
|
|
118
|
+
# Used by both RSpec and Minitest test helpers.
|
|
119
|
+
module StepAdapterOverride
|
|
120
|
+
def run(input, context: {})
|
|
121
|
+
context = context || {}
|
|
122
|
+
overrides = RubyLLM::Contract.step_adapter_overrides
|
|
123
|
+
unless overrides.empty? || context.key?(:adapter) || context.key?("adapter")
|
|
124
|
+
override = overrides[self]
|
|
125
|
+
context = context.merge(adapter: override) if override
|
|
126
|
+
end
|
|
127
|
+
super(input, context: context)
|
|
128
|
+
end
|
|
129
|
+
end
|
|
105
130
|
end
|
|
106
131
|
end
|
|
107
132
|
|
|
@@ -126,3 +151,6 @@ require_relative "contract/pipeline"
|
|
|
126
151
|
require_relative "contract/eval"
|
|
127
152
|
require_relative "contract/dsl"
|
|
128
153
|
require_relative "contract/railtie" if defined?(Rails::Railtie)
|
|
154
|
+
|
|
155
|
+
# Prepend after Step::Base is loaded
|
|
156
|
+
RubyLLM::Contract::Step::Base.singleton_class.prepend(RubyLLM::Contract::StepAdapterOverride)
|
metadata
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: ruby_llm-contract
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 0.
|
|
4
|
+
version: 0.5.0
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
7
|
- Justyna
|
|
@@ -60,6 +60,8 @@ extra_rdoc_files: []
|
|
|
60
60
|
files:
|
|
61
61
|
- ".rspec"
|
|
62
62
|
- ".rubocop.yml"
|
|
63
|
+
- ".rubycritic.yml"
|
|
64
|
+
- ".simplecov"
|
|
63
65
|
- CHANGELOG.md
|
|
64
66
|
- Gemfile
|
|
65
67
|
- Gemfile.lock
|
|
@@ -95,13 +97,23 @@ files:
|
|
|
95
97
|
- lib/ruby_llm/contract/contract/invariant.rb
|
|
96
98
|
- lib/ruby_llm/contract/contract/parser.rb
|
|
97
99
|
- lib/ruby_llm/contract/contract/schema_validator.rb
|
|
100
|
+
- lib/ruby_llm/contract/contract/schema_validator/bound_rule.rb
|
|
101
|
+
- lib/ruby_llm/contract/contract/schema_validator/enum_rule.rb
|
|
102
|
+
- lib/ruby_llm/contract/contract/schema_validator/node.rb
|
|
103
|
+
- lib/ruby_llm/contract/contract/schema_validator/object_rules.rb
|
|
104
|
+
- lib/ruby_llm/contract/contract/schema_validator/scalar_rules.rb
|
|
105
|
+
- lib/ruby_llm/contract/contract/schema_validator/schema_extractor.rb
|
|
106
|
+
- lib/ruby_llm/contract/contract/schema_validator/type_rule.rb
|
|
98
107
|
- lib/ruby_llm/contract/contract/validator.rb
|
|
99
108
|
- lib/ruby_llm/contract/cost_calculator.rb
|
|
100
109
|
- lib/ruby_llm/contract/dsl.rb
|
|
101
110
|
- lib/ruby_llm/contract/errors.rb
|
|
102
111
|
- lib/ruby_llm/contract/eval.rb
|
|
103
112
|
- lib/ruby_llm/contract/eval/baseline_diff.rb
|
|
113
|
+
- lib/ruby_llm/contract/eval/case_executor.rb
|
|
104
114
|
- lib/ruby_llm/contract/eval/case_result.rb
|
|
115
|
+
- lib/ruby_llm/contract/eval/case_result_builder.rb
|
|
116
|
+
- lib/ruby_llm/contract/eval/case_scorer.rb
|
|
105
117
|
- lib/ruby_llm/contract/eval/contract_detail_builder.rb
|
|
106
118
|
- lib/ruby_llm/contract/eval/dataset.rb
|
|
107
119
|
- lib/ruby_llm/contract/eval/eval_definition.rb
|
|
@@ -111,10 +123,20 @@ files:
|
|
|
111
123
|
- lib/ruby_llm/contract/eval/evaluator/json_includes.rb
|
|
112
124
|
- lib/ruby_llm/contract/eval/evaluator/proc_evaluator.rb
|
|
113
125
|
- lib/ruby_llm/contract/eval/evaluator/regex.rb
|
|
126
|
+
- lib/ruby_llm/contract/eval/expectation_evaluator.rb
|
|
114
127
|
- lib/ruby_llm/contract/eval/model_comparison.rb
|
|
115
128
|
- lib/ruby_llm/contract/eval/pipeline_result_adapter.rb
|
|
129
|
+
- lib/ruby_llm/contract/eval/prompt_diff.rb
|
|
130
|
+
- lib/ruby_llm/contract/eval/prompt_diff_comparator.rb
|
|
131
|
+
- lib/ruby_llm/contract/eval/prompt_diff_presenter.rb
|
|
132
|
+
- lib/ruby_llm/contract/eval/prompt_diff_serializer.rb
|
|
116
133
|
- lib/ruby_llm/contract/eval/report.rb
|
|
134
|
+
- lib/ruby_llm/contract/eval/report_presenter.rb
|
|
135
|
+
- lib/ruby_llm/contract/eval/report_stats.rb
|
|
136
|
+
- lib/ruby_llm/contract/eval/report_storage.rb
|
|
117
137
|
- lib/ruby_llm/contract/eval/runner.rb
|
|
138
|
+
- lib/ruby_llm/contract/eval/step_expectation_applier.rb
|
|
139
|
+
- lib/ruby_llm/contract/eval/step_result_normalizer.rb
|
|
118
140
|
- lib/ruby_llm/contract/eval/trait_evaluator.rb
|
|
119
141
|
- lib/ruby_llm/contract/minitest.rb
|
|
120
142
|
- lib/ruby_llm/contract/pipeline.rb
|
|
@@ -139,13 +161,18 @@ files:
|
|
|
139
161
|
- lib/ruby_llm/contract/rspec/pass_eval.rb
|
|
140
162
|
- lib/ruby_llm/contract/rspec/satisfy_contract.rb
|
|
141
163
|
- lib/ruby_llm/contract/step.rb
|
|
164
|
+
- lib/ruby_llm/contract/step/adapter_caller.rb
|
|
142
165
|
- lib/ruby_llm/contract/step/base.rb
|
|
143
166
|
- lib/ruby_llm/contract/step/dsl.rb
|
|
167
|
+
- lib/ruby_llm/contract/step/input_validator.rb
|
|
144
168
|
- lib/ruby_llm/contract/step/limit_checker.rb
|
|
169
|
+
- lib/ruby_llm/contract/step/prompt_compiler.rb
|
|
145
170
|
- lib/ruby_llm/contract/step/result.rb
|
|
171
|
+
- lib/ruby_llm/contract/step/result_builder.rb
|
|
146
172
|
- lib/ruby_llm/contract/step/retry_executor.rb
|
|
147
173
|
- lib/ruby_llm/contract/step/retry_policy.rb
|
|
148
174
|
- lib/ruby_llm/contract/step/runner.rb
|
|
175
|
+
- lib/ruby_llm/contract/step/runner_config.rb
|
|
149
176
|
- lib/ruby_llm/contract/step/trace.rb
|
|
150
177
|
- lib/ruby_llm/contract/token_estimator.rb
|
|
151
178
|
- lib/ruby_llm/contract/types.rb
|