ruby_llm-contract 0.4.5 → 0.5.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.rubycritic.yml +8 -0
- data/.simplecov +22 -0
- data/CHANGELOG.md +19 -0
- data/Gemfile +2 -0
- data/Gemfile.lock +104 -2
- data/README.md +42 -2
- data/lib/ruby_llm/contract/concerns/context_helpers.rb +11 -10
- data/lib/ruby_llm/contract/concerns/deep_freeze.rb +13 -7
- data/lib/ruby_llm/contract/concerns/deep_symbolize.rb +15 -5
- data/lib/ruby_llm/contract/concerns/eval_host.rb +51 -7
- data/lib/ruby_llm/contract/contract/schema_validator/bound_rule.rb +85 -0
- data/lib/ruby_llm/contract/contract/schema_validator/enum_rule.rb +23 -0
- data/lib/ruby_llm/contract/contract/schema_validator/node.rb +70 -0
- data/lib/ruby_llm/contract/contract/schema_validator/object_rules.rb +66 -0
- data/lib/ruby_llm/contract/contract/schema_validator/scalar_rules.rb +22 -0
- data/lib/ruby_llm/contract/contract/schema_validator/schema_extractor.rb +23 -0
- data/lib/ruby_llm/contract/contract/schema_validator/type_rule.rb +30 -0
- data/lib/ruby_llm/contract/contract/schema_validator.rb +41 -266
- data/lib/ruby_llm/contract/contract/validator.rb +9 -0
- data/lib/ruby_llm/contract/eval/case_executor.rb +52 -0
- data/lib/ruby_llm/contract/eval/case_result_builder.rb +35 -0
- data/lib/ruby_llm/contract/eval/case_scorer.rb +66 -0
- data/lib/ruby_llm/contract/eval/evaluator/exact.rb +8 -6
- data/lib/ruby_llm/contract/eval/evaluator/proc_evaluator.rb +22 -10
- data/lib/ruby_llm/contract/eval/evaluator/regex.rb +11 -8
- data/lib/ruby_llm/contract/eval/expectation_evaluator.rb +26 -0
- data/lib/ruby_llm/contract/eval/prompt_diff.rb +39 -0
- data/lib/ruby_llm/contract/eval/prompt_diff_comparator.rb +116 -0
- data/lib/ruby_llm/contract/eval/prompt_diff_presenter.rb +99 -0
- data/lib/ruby_llm/contract/eval/prompt_diff_serializer.rb +23 -0
- data/lib/ruby_llm/contract/eval/report.rb +19 -191
- data/lib/ruby_llm/contract/eval/report_presenter.rb +65 -0
- data/lib/ruby_llm/contract/eval/report_stats.rb +65 -0
- data/lib/ruby_llm/contract/eval/report_storage.rb +107 -0
- data/lib/ruby_llm/contract/eval/runner.rb +30 -207
- data/lib/ruby_llm/contract/eval/step_expectation_applier.rb +67 -0
- data/lib/ruby_llm/contract/eval/step_result_normalizer.rb +39 -0
- data/lib/ruby_llm/contract/eval.rb +13 -0
- data/lib/ruby_llm/contract/pipeline/base.rb +10 -1
- data/lib/ruby_llm/contract/rspec/pass_eval.rb +84 -3
- data/lib/ruby_llm/contract/rspec.rb +5 -0
- data/lib/ruby_llm/contract/step/adapter_caller.rb +23 -0
- data/lib/ruby_llm/contract/step/base.rb +93 -38
- data/lib/ruby_llm/contract/step/dsl.rb +10 -0
- data/lib/ruby_llm/contract/step/input_validator.rb +34 -0
- data/lib/ruby_llm/contract/step/limit_checker.rb +11 -11
- data/lib/ruby_llm/contract/step/prompt_compiler.rb +33 -0
- data/lib/ruby_llm/contract/step/result.rb +3 -2
- data/lib/ruby_llm/contract/step/result_builder.rb +60 -0
- data/lib/ruby_llm/contract/step/retry_executor.rb +1 -0
- data/lib/ruby_llm/contract/step/runner.rb +46 -85
- data/lib/ruby_llm/contract/step/runner_config.rb +37 -0
- data/lib/ruby_llm/contract/step.rb +5 -0
- data/lib/ruby_llm/contract/version.rb +1 -1
- metadata +28 -1
|
@@ -4,6 +4,8 @@ module RubyLLM
|
|
|
4
4
|
module Contract
|
|
5
5
|
module Step
|
|
6
6
|
class Base
|
|
7
|
+
DEFAULT_OUTPUT_TOKENS = 256
|
|
8
|
+
|
|
7
9
|
def self.inherited(subclass)
|
|
8
10
|
super
|
|
9
11
|
Contract.register_eval_host(subclass) if respond_to?(:eval_defined?) && eval_defined?
|
|
@@ -15,30 +17,23 @@ module RubyLLM
|
|
|
15
17
|
include Dsl
|
|
16
18
|
|
|
17
19
|
def eval_case(input:, expected: nil, expected_traits: nil, evaluator: nil, context: {})
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
expected_traits: expected_traits, evaluator: evaluator)
|
|
21
|
-
end
|
|
22
|
-
report = Eval::Runner.run(step: self, dataset: dataset, context: context)
|
|
23
|
-
report.results.first
|
|
20
|
+
Eval::Runner.run(step: self, dataset: inline_dataset(input, expected, expected_traits, evaluator),
|
|
21
|
+
context: context).results.first
|
|
24
22
|
end
|
|
25
23
|
|
|
26
24
|
def estimate_cost(input:, model: nil)
|
|
27
|
-
model_name =
|
|
28
|
-
messages = build_messages(input)
|
|
29
|
-
input_tokens = TokenEstimator.estimate(messages)
|
|
30
|
-
output_tokens = max_output || 256 # conservative default
|
|
31
|
-
|
|
25
|
+
model_name = estimated_model_name(model)
|
|
32
26
|
model_info = CostCalculator.send(:find_model, model_name)
|
|
33
27
|
return nil unless model_info
|
|
34
28
|
|
|
35
|
-
|
|
36
|
-
|
|
29
|
+
input_tokens = TokenEstimator.estimate(build_messages(input))
|
|
30
|
+
output_tokens = max_output || DEFAULT_OUTPUT_TOKENS
|
|
31
|
+
|
|
37
32
|
{
|
|
38
33
|
model: model_name,
|
|
39
34
|
input_tokens: input_tokens,
|
|
40
35
|
output_tokens_estimate: output_tokens,
|
|
41
|
-
estimated_cost:
|
|
36
|
+
estimated_cost: estimated_cost_for(model_info, input_tokens, output_tokens)
|
|
42
37
|
}
|
|
43
38
|
end
|
|
44
39
|
|
|
@@ -46,16 +41,11 @@ module RubyLLM
|
|
|
46
41
|
defn = send(:all_eval_definitions)[eval_name.to_s]
|
|
47
42
|
raise ArgumentError, "No eval '#{eval_name}' defined" unless defn
|
|
48
43
|
|
|
49
|
-
|
|
50
|
-
model_list = models || [step_model || RubyLLM::Contract.configuration.default_model].compact
|
|
44
|
+
model_list = models || [estimated_model_name].compact
|
|
51
45
|
cases = defn.build_dataset.cases
|
|
52
46
|
|
|
53
47
|
model_list.each_with_object({}) do |model_name, result|
|
|
54
|
-
|
|
55
|
-
est = estimate_cost(input: c.input, model: model_name)
|
|
56
|
-
est ? est[:estimated_cost] : 0.0
|
|
57
|
-
end
|
|
58
|
-
result[model_name] = per_case.round(6)
|
|
48
|
+
result[model_name] = estimate_eval_cost_for_model(cases, model_name)
|
|
59
49
|
end
|
|
60
50
|
end
|
|
61
51
|
|
|
@@ -66,20 +56,8 @@ module RubyLLM
|
|
|
66
56
|
def run(input, context: {})
|
|
67
57
|
context = safe_context(context)
|
|
68
58
|
warn_unknown_context_keys(context)
|
|
69
|
-
adapter = resolve_adapter(context)
|
|
70
|
-
default_model = context[:model] || model || RubyLLM::Contract.configuration.default_model
|
|
71
|
-
policy = retry_policy
|
|
72
|
-
|
|
73
|
-
ctx_temp = context[:temperature]
|
|
74
|
-
extra = context.slice(:provider, :assume_model_exists, :max_tokens)
|
|
75
|
-
result = if policy
|
|
76
|
-
run_with_retry(input, adapter: adapter, default_model: default_model,
|
|
77
|
-
policy: policy, context_temperature: ctx_temp, extra_options: extra)
|
|
78
|
-
else
|
|
79
|
-
run_once(input, adapter: adapter, model: default_model,
|
|
80
|
-
context_temperature: ctx_temp, extra_options: extra)
|
|
81
|
-
end
|
|
82
59
|
|
|
60
|
+
result = dispatch_run(input, context)
|
|
83
61
|
log_result(result)
|
|
84
62
|
invoke_around_call(input, result)
|
|
85
63
|
end
|
|
@@ -88,13 +66,43 @@ module RubyLLM
|
|
|
88
66
|
dynamic = prompt.arity >= 1
|
|
89
67
|
builder_input = dynamic ? input : Prompt::Builder::NOT_PROVIDED
|
|
90
68
|
ast = Prompt::Builder.build(input: builder_input, &prompt)
|
|
91
|
-
variables
|
|
92
|
-
variables.merge!(input.transform_keys(&:to_sym)) if !dynamic && input.is_a?(Hash)
|
|
93
|
-
Prompt::Renderer.render(ast, variables: variables)
|
|
69
|
+
Prompt::Renderer.render(ast, variables: prompt_variables(input, dynamic))
|
|
94
70
|
end
|
|
95
71
|
|
|
96
72
|
private
|
|
97
73
|
|
|
74
|
+
def inline_dataset(input, expected, expected_traits, evaluator)
|
|
75
|
+
Eval::Dataset.define("single_case") do
|
|
76
|
+
add_case("inline", input: input, expected: expected,
|
|
77
|
+
expected_traits: expected_traits, evaluator: evaluator)
|
|
78
|
+
end
|
|
79
|
+
end
|
|
80
|
+
|
|
81
|
+
def estimated_model_name(model = nil)
|
|
82
|
+
model || (self.model if respond_to?(:model)) || RubyLLM::Contract.configuration.default_model
|
|
83
|
+
end
|
|
84
|
+
|
|
85
|
+
def estimated_cost_for(model_info, input_tokens, output_tokens)
|
|
86
|
+
CostCalculator.send(
|
|
87
|
+
:compute_cost,
|
|
88
|
+
model_info,
|
|
89
|
+
{ input_tokens: input_tokens, output_tokens: output_tokens }
|
|
90
|
+
)
|
|
91
|
+
end
|
|
92
|
+
|
|
93
|
+
def estimate_eval_cost_for_model(cases, model_name)
|
|
94
|
+
cases.sum do |test_case|
|
|
95
|
+
estimate = estimate_cost(input: test_case.input, model: model_name)
|
|
96
|
+
estimate ? estimate[:estimated_cost] : 0.0
|
|
97
|
+
end.round(6)
|
|
98
|
+
end
|
|
99
|
+
|
|
100
|
+
def prompt_variables(input, dynamic)
|
|
101
|
+
variables = dynamic ? {} : { input: input }
|
|
102
|
+
variables.merge!(input.transform_keys(&:to_sym)) if !dynamic && input.is_a?(Hash)
|
|
103
|
+
variables
|
|
104
|
+
end
|
|
105
|
+
|
|
98
106
|
def warn_unknown_context_keys(context)
|
|
99
107
|
unknown = context.keys - KNOWN_CONTEXT_KEYS
|
|
100
108
|
return if unknown.empty?
|
|
@@ -103,6 +111,39 @@ module RubyLLM
|
|
|
103
111
|
"Known keys: #{KNOWN_CONTEXT_KEYS.inspect}"
|
|
104
112
|
end
|
|
105
113
|
|
|
114
|
+
def dispatch_run(input, context)
|
|
115
|
+
adapter = resolve_adapter(context)
|
|
116
|
+
runtime = runtime_settings(context)
|
|
117
|
+
|
|
118
|
+
if runtime[:policy]
|
|
119
|
+
run_with_retry(
|
|
120
|
+
input,
|
|
121
|
+
adapter: adapter,
|
|
122
|
+
default_model: runtime[:model],
|
|
123
|
+
policy: runtime[:policy],
|
|
124
|
+
context_temperature: runtime[:temperature],
|
|
125
|
+
extra_options: runtime[:extra_options]
|
|
126
|
+
)
|
|
127
|
+
else
|
|
128
|
+
run_once(
|
|
129
|
+
input,
|
|
130
|
+
adapter: adapter,
|
|
131
|
+
model: runtime[:model],
|
|
132
|
+
context_temperature: runtime[:temperature],
|
|
133
|
+
extra_options: runtime[:extra_options]
|
|
134
|
+
)
|
|
135
|
+
end
|
|
136
|
+
end
|
|
137
|
+
|
|
138
|
+
def runtime_settings(context)
|
|
139
|
+
{
|
|
140
|
+
model: context[:model] || model || RubyLLM::Contract.configuration.default_model,
|
|
141
|
+
temperature: context[:temperature],
|
|
142
|
+
extra_options: context.slice(:provider, :assume_model_exists, :max_tokens),
|
|
143
|
+
policy: retry_policy
|
|
144
|
+
}
|
|
145
|
+
end
|
|
146
|
+
|
|
106
147
|
def resolve_adapter(context)
|
|
107
148
|
adapter = context[:adapter] || RubyLLM::Contract.configuration.default_adapter
|
|
108
149
|
return adapter if adapter
|
|
@@ -119,7 +160,8 @@ module RubyLLM
|
|
|
119
160
|
adapter: adapter, model: model, output_schema: output_schema,
|
|
120
161
|
max_output: max_output, max_input: max_input, max_cost: max_cost,
|
|
121
162
|
on_unknown_pricing: on_unknown_pricing,
|
|
122
|
-
temperature: effective_temp, extra_options: extra_options
|
|
163
|
+
temperature: effective_temp, extra_options: extra_options,
|
|
164
|
+
observers: class_observers
|
|
123
165
|
).call(input)
|
|
124
166
|
rescue ArgumentError => e
|
|
125
167
|
Result.new(status: :input_error, raw_output: nil, parsed_output: nil,
|
|
@@ -137,6 +179,19 @@ module RubyLLM
|
|
|
137
179
|
"tokens=#{trace.usage&.dig(:input_tokens) || 0}+#{trace.usage&.dig(:output_tokens) || 0} " \
|
|
138
180
|
"cost=$#{format("%.6f", trace.cost || 0)}"
|
|
139
181
|
logger.info(msg)
|
|
182
|
+
|
|
183
|
+
log_failed_observations(result, logger)
|
|
184
|
+
end
|
|
185
|
+
|
|
186
|
+
def log_failed_observations(result, logger)
|
|
187
|
+
failed = result.observations.select { |o| !o[:passed] }
|
|
188
|
+
return if failed.empty?
|
|
189
|
+
|
|
190
|
+
failed.each do |obs|
|
|
191
|
+
msg = "[ruby_llm-contract] #{name || self} observation failed: #{obs[:description]}"
|
|
192
|
+
msg += " (#{obs[:error]})" if obs[:error]
|
|
193
|
+
logger.warn(msg)
|
|
194
|
+
end
|
|
140
195
|
end
|
|
141
196
|
|
|
142
197
|
def invoke_around_call(input, result)
|
|
@@ -79,6 +79,16 @@ module RubyLLM
|
|
|
79
79
|
inherited + own
|
|
80
80
|
end
|
|
81
81
|
|
|
82
|
+
def observe(description, &block)
|
|
83
|
+
(@class_observers ||= []) << Invariant.new(description, block)
|
|
84
|
+
end
|
|
85
|
+
|
|
86
|
+
def class_observers
|
|
87
|
+
own = defined?(@class_observers) ? @class_observers : []
|
|
88
|
+
inherited = superclass.respond_to?(:class_observers) ? superclass.class_observers : []
|
|
89
|
+
inherited + own
|
|
90
|
+
end
|
|
91
|
+
|
|
82
92
|
def max_output(tokens = nil)
|
|
83
93
|
if tokens
|
|
84
94
|
unless tokens.is_a?(Numeric) && tokens.positive?
|
|
@@ -0,0 +1,34 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module RubyLLM
|
|
4
|
+
module Contract
|
|
5
|
+
module Step
|
|
6
|
+
class InputValidator
|
|
7
|
+
def initialize(input_type:)
|
|
8
|
+
@input_type = input_type
|
|
9
|
+
end
|
|
10
|
+
|
|
11
|
+
def call(input)
|
|
12
|
+
validate(input)
|
|
13
|
+
nil
|
|
14
|
+
rescue Dry::Types::CoercionError, TypeError, ArgumentError => error
|
|
15
|
+
Result.new(status: :input_error, raw_output: nil, parsed_output: nil, validation_errors: [error.message])
|
|
16
|
+
end
|
|
17
|
+
|
|
18
|
+
private
|
|
19
|
+
|
|
20
|
+
def validate(input)
|
|
21
|
+
if ruby_class_input?
|
|
22
|
+
raise TypeError, "#{input.inspect} is not a #{@input_type}" unless input.is_a?(@input_type)
|
|
23
|
+
else
|
|
24
|
+
@input_type[input]
|
|
25
|
+
end
|
|
26
|
+
end
|
|
27
|
+
|
|
28
|
+
def ruby_class_input?
|
|
29
|
+
@input_type.is_a?(Class) && !@input_type.respond_to?(:[])
|
|
30
|
+
end
|
|
31
|
+
end
|
|
32
|
+
end
|
|
33
|
+
end
|
|
34
|
+
end
|
|
@@ -9,7 +9,7 @@ module RubyLLM
|
|
|
9
9
|
private
|
|
10
10
|
|
|
11
11
|
def check_limits(messages)
|
|
12
|
-
return nil unless
|
|
12
|
+
return nil unless max_input || max_cost
|
|
13
13
|
|
|
14
14
|
estimated = TokenEstimator.estimate(messages)
|
|
15
15
|
errors = collect_limit_errors(estimated)
|
|
@@ -21,10 +21,10 @@ module RubyLLM
|
|
|
21
21
|
|
|
22
22
|
def collect_limit_errors(estimated)
|
|
23
23
|
errors = []
|
|
24
|
-
if
|
|
25
|
-
errors << "Input token limit exceeded: estimated #{estimated} tokens, max #{
|
|
24
|
+
if max_input && estimated > max_input
|
|
25
|
+
errors << "Input token limit exceeded: estimated #{estimated} tokens, max #{max_input}"
|
|
26
26
|
end
|
|
27
|
-
append_cost_error(estimated, errors) if
|
|
27
|
+
append_cost_error(estimated, errors) if max_cost
|
|
28
28
|
errors
|
|
29
29
|
end
|
|
30
30
|
|
|
@@ -38,25 +38,25 @@ module RubyLLM
|
|
|
38
38
|
def append_cost_error(estimated, errors)
|
|
39
39
|
estimated_output = effective_max_output || (estimated * DEFAULT_OUTPUT_RATIO)
|
|
40
40
|
estimated_cost = CostCalculator.calculate(
|
|
41
|
-
model_name:
|
|
41
|
+
model_name: model_name,
|
|
42
42
|
usage: { input_tokens: estimated, output_tokens: estimated_output }
|
|
43
43
|
)
|
|
44
44
|
|
|
45
45
|
if estimated_cost.nil?
|
|
46
46
|
handle_unknown_pricing(errors)
|
|
47
|
-
elsif estimated_cost >
|
|
47
|
+
elsif estimated_cost > max_cost
|
|
48
48
|
errors << "Cost limit exceeded: estimated $#{format("%.6f", estimated_cost)} " \
|
|
49
49
|
"(#{estimated} input + #{estimated_output} output tokens), " \
|
|
50
|
-
"max $#{format("%.6f",
|
|
50
|
+
"max $#{format("%.6f", max_cost)}"
|
|
51
51
|
end
|
|
52
52
|
end
|
|
53
53
|
|
|
54
54
|
def handle_unknown_pricing(errors)
|
|
55
|
-
if
|
|
56
|
-
warn "[ruby_llm-contract] max_cost is configured but model '#{
|
|
55
|
+
if on_unknown_pricing == :warn
|
|
56
|
+
warn "[ruby_llm-contract] max_cost is configured but model '#{model_name}' " \
|
|
57
57
|
"has no pricing data — cost limit not enforced"
|
|
58
58
|
else
|
|
59
|
-
errors << "max_cost is set but model '#{
|
|
59
|
+
errors << "max_cost is set but model '#{model_name}' has no pricing data. " \
|
|
60
60
|
"Register pricing via CostCalculator.register_model or set " \
|
|
61
61
|
"on_unknown_pricing: :warn to proceed without cost checks."
|
|
62
62
|
end
|
|
@@ -69,7 +69,7 @@ module RubyLLM
|
|
|
69
69
|
parsed_output: nil,
|
|
70
70
|
validation_errors: errors,
|
|
71
71
|
trace: Trace.new(
|
|
72
|
-
messages: messages, model:
|
|
72
|
+
messages: messages, model: model_name,
|
|
73
73
|
usage: { input_tokens: 0, output_tokens: 0, estimated_input_tokens: estimated,
|
|
74
74
|
estimate_method: :heuristic }
|
|
75
75
|
)
|
|
@@ -0,0 +1,33 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module RubyLLM
|
|
4
|
+
module Contract
|
|
5
|
+
module Step
|
|
6
|
+
class PromptCompiler
|
|
7
|
+
def initialize(prompt_block:)
|
|
8
|
+
@prompt_block = prompt_block
|
|
9
|
+
end
|
|
10
|
+
|
|
11
|
+
def call(input)
|
|
12
|
+
dynamic_prompt = @prompt_block.arity >= 1
|
|
13
|
+
builder_input = dynamic_prompt ? input : nil
|
|
14
|
+
ast = Prompt::Builder.build(input: builder_input, &@prompt_block)
|
|
15
|
+
|
|
16
|
+
Prompt::Renderer.render(ast, variables: template_variables_for(input, dynamic_prompt))
|
|
17
|
+
rescue StandardError => error
|
|
18
|
+
raise RubyLLM::Contract::Error, "Prompt build failed: #{error.class}: #{error.message}"
|
|
19
|
+
end
|
|
20
|
+
|
|
21
|
+
private
|
|
22
|
+
|
|
23
|
+
def template_variables_for(input, dynamic_prompt)
|
|
24
|
+
return {} if dynamic_prompt
|
|
25
|
+
|
|
26
|
+
{ input: input }.tap do |variables|
|
|
27
|
+
variables.merge!(input.transform_keys(&:to_sym)) if input.is_a?(Hash)
|
|
28
|
+
end
|
|
29
|
+
end
|
|
30
|
+
end
|
|
31
|
+
end
|
|
32
|
+
end
|
|
33
|
+
end
|
|
@@ -4,13 +4,14 @@ module RubyLLM
|
|
|
4
4
|
module Contract
|
|
5
5
|
module Step
|
|
6
6
|
class Result
|
|
7
|
-
attr_reader :status, :raw_output, :parsed_output, :validation_errors, :trace
|
|
7
|
+
attr_reader :status, :raw_output, :parsed_output, :validation_errors, :trace, :observations
|
|
8
8
|
|
|
9
|
-
def initialize(status:, raw_output:, parsed_output:, validation_errors: [], trace: nil)
|
|
9
|
+
def initialize(status:, raw_output:, parsed_output:, validation_errors: [], trace: nil, observations: [])
|
|
10
10
|
@status = status
|
|
11
11
|
@raw_output = raw_output
|
|
12
12
|
@parsed_output = parsed_output
|
|
13
13
|
@validation_errors = validation_errors.freeze
|
|
14
|
+
@observations = observations.freeze
|
|
14
15
|
@trace = normalize_trace(trace)
|
|
15
16
|
freeze
|
|
16
17
|
end
|
|
@@ -0,0 +1,60 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module RubyLLM
|
|
4
|
+
module Contract
|
|
5
|
+
module Step
|
|
6
|
+
class ResultBuilder
|
|
7
|
+
def initialize(contract_definition:, output_type:, output_schema:, model:, observers:)
|
|
8
|
+
@contract_definition = contract_definition
|
|
9
|
+
@output_type = output_type
|
|
10
|
+
@output_schema = output_schema
|
|
11
|
+
@model = model
|
|
12
|
+
@observers = observers
|
|
13
|
+
end
|
|
14
|
+
|
|
15
|
+
def error_result(error_result:, messages:)
|
|
16
|
+
Result.new(
|
|
17
|
+
status: error_result.status,
|
|
18
|
+
raw_output: error_result.raw_output,
|
|
19
|
+
parsed_output: error_result.parsed_output,
|
|
20
|
+
validation_errors: error_result.validation_errors,
|
|
21
|
+
trace: Trace.new(messages: messages, model: @model)
|
|
22
|
+
)
|
|
23
|
+
end
|
|
24
|
+
|
|
25
|
+
def success_result(response:, messages:, latency_ms:, input:)
|
|
26
|
+
raw_output = response.content
|
|
27
|
+
validation_result = validate_output(raw_output, input)
|
|
28
|
+
trace = Trace.new(messages: messages, model: @model, latency_ms: latency_ms, usage: response.usage)
|
|
29
|
+
|
|
30
|
+
Result.new(
|
|
31
|
+
status: validation_result[:status],
|
|
32
|
+
raw_output: raw_output,
|
|
33
|
+
parsed_output: validation_result[:parsed_output],
|
|
34
|
+
validation_errors: validation_result[:errors],
|
|
35
|
+
trace: trace,
|
|
36
|
+
observations: observations_for(validation_result, input)
|
|
37
|
+
)
|
|
38
|
+
end
|
|
39
|
+
|
|
40
|
+
private
|
|
41
|
+
|
|
42
|
+
def observations_for(validation_result, input)
|
|
43
|
+
return [] unless validation_result[:status] == :ok && @observers.any?
|
|
44
|
+
|
|
45
|
+
Validator.run_observations(@observers, validation_result[:parsed_output], input: input)
|
|
46
|
+
end
|
|
47
|
+
|
|
48
|
+
def validate_output(raw_output, input)
|
|
49
|
+
Validator.validate(
|
|
50
|
+
raw_output: raw_output,
|
|
51
|
+
definition: @contract_definition,
|
|
52
|
+
output_type: @output_type,
|
|
53
|
+
input: input,
|
|
54
|
+
schema: @output_schema
|
|
55
|
+
)
|
|
56
|
+
end
|
|
57
|
+
end
|
|
58
|
+
end
|
|
59
|
+
end
|
|
60
|
+
end
|
|
@@ -32,6 +32,7 @@ module RubyLLM
|
|
|
32
32
|
Result.new(
|
|
33
33
|
status: last.status, raw_output: last.raw_output,
|
|
34
34
|
parsed_output: last.parsed_output, validation_errors: last.validation_errors,
|
|
35
|
+
observations: last.observations,
|
|
35
36
|
trace: last.trace.merge(
|
|
36
37
|
attempts: attempt_log, usage: aggregated_usage,
|
|
37
38
|
cost: total_cost, latency_ms: total_latency
|
|
@@ -9,27 +9,30 @@ module RubyLLM
|
|
|
9
9
|
def initialize(input_type:, output_type:, prompt_block:, contract_definition:,
|
|
10
10
|
adapter:, model:, output_schema: nil, max_output: nil,
|
|
11
11
|
max_input: nil, max_cost: nil, on_unknown_pricing: :refuse,
|
|
12
|
-
temperature: nil, extra_options: {})
|
|
13
|
-
@
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
12
|
+
temperature: nil, extra_options: {}, observers: [])
|
|
13
|
+
@config = RunnerConfig.new(
|
|
14
|
+
input_type: input_type,
|
|
15
|
+
output_type: output_type,
|
|
16
|
+
prompt_block: prompt_block,
|
|
17
|
+
contract_definition: contract_definition,
|
|
18
|
+
adapter: adapter,
|
|
19
|
+
model: model,
|
|
20
|
+
output_schema: output_schema,
|
|
21
|
+
max_output: max_output,
|
|
22
|
+
max_input: max_input,
|
|
23
|
+
max_cost: max_cost,
|
|
24
|
+
on_unknown_pricing: on_unknown_pricing,
|
|
25
|
+
temperature: temperature,
|
|
26
|
+
extra_options: extra_options,
|
|
27
|
+
observers: observers
|
|
28
|
+
)
|
|
26
29
|
end
|
|
27
30
|
|
|
28
31
|
def call(input)
|
|
29
|
-
validated_input =
|
|
32
|
+
validated_input = input_validator.call(input)
|
|
30
33
|
return validated_input if validated_input.is_a?(Result)
|
|
31
34
|
|
|
32
|
-
messages =
|
|
35
|
+
messages = prompt_compiler.call(input)
|
|
33
36
|
rescue RubyLLM::Contract::Error => e
|
|
34
37
|
Result.new(status: :input_error, raw_output: nil, parsed_output: nil,
|
|
35
38
|
validation_errors: [e.message])
|
|
@@ -43,94 +46,52 @@ module RubyLLM
|
|
|
43
46
|
limit_result = check_limits(messages)
|
|
44
47
|
return limit_result if limit_result
|
|
45
48
|
|
|
46
|
-
response, latency_ms =
|
|
47
|
-
return
|
|
49
|
+
response, latency_ms = adapter_caller.call(messages)
|
|
50
|
+
return result_builder.error_result(error_result: response, messages: messages) if response.is_a?(Result)
|
|
48
51
|
|
|
49
|
-
|
|
52
|
+
result_builder.success_result(response: response, messages: messages, latency_ms: latency_ms, input: input)
|
|
50
53
|
end
|
|
51
54
|
|
|
52
|
-
def
|
|
53
|
-
|
|
54
|
-
if type.is_a?(Class) && !type.respond_to?(:[])
|
|
55
|
-
raise TypeError, "#{input.inspect} is not a #{type}" unless input.is_a?(type)
|
|
56
|
-
else
|
|
57
|
-
type[input]
|
|
58
|
-
end
|
|
59
|
-
nil
|
|
60
|
-
rescue Dry::Types::CoercionError, TypeError, ArgumentError => e
|
|
61
|
-
Result.new(status: :input_error, raw_output: nil, parsed_output: nil, validation_errors: [e.message])
|
|
55
|
+
def input_validator
|
|
56
|
+
InputValidator.new(input_type: @config.input_type)
|
|
62
57
|
end
|
|
63
58
|
|
|
64
|
-
def
|
|
65
|
-
|
|
66
|
-
ast = Prompt::Builder.build(input: dynamic ? input : nil, &@prompt_block)
|
|
67
|
-
|
|
68
|
-
Prompt::Renderer.render(ast, variables: dynamic ? {} : template_variables_for(input))
|
|
69
|
-
rescue StandardError => e
|
|
70
|
-
raise RubyLLM::Contract::Error, "Prompt build failed: #{e.class}: #{e.message}"
|
|
59
|
+
def prompt_compiler
|
|
60
|
+
PromptCompiler.new(prompt_block: @config.prompt_block)
|
|
71
61
|
end
|
|
72
62
|
|
|
73
|
-
def
|
|
74
|
-
|
|
75
|
-
input.is_a?(Hash) ? base.merge(input.transform_keys(&:to_sym)) : base
|
|
63
|
+
def adapter_caller
|
|
64
|
+
AdapterCaller.new(adapter: @config.adapter, adapter_options: @config.adapter_options)
|
|
76
65
|
end
|
|
77
66
|
|
|
78
|
-
def
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
67
|
+
def result_builder
|
|
68
|
+
ResultBuilder.new(
|
|
69
|
+
contract_definition: @config.contract_definition,
|
|
70
|
+
output_type: @config.output_type,
|
|
71
|
+
output_schema: @config.output_schema,
|
|
72
|
+
model: @config.model,
|
|
73
|
+
observers: @config.observers
|
|
74
|
+
)
|
|
85
75
|
end
|
|
86
76
|
|
|
87
|
-
def
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
{ model: @model }.tap do |opts|
|
|
91
|
-
opts[:schema] = @output_schema if @output_schema
|
|
92
|
-
opts[:max_tokens] = effective_max_tokens if effective_max_tokens
|
|
93
|
-
opts[:temperature] = @temperature if @temperature
|
|
94
|
-
@extra_options.each { |k, v| opts[k] = v unless opts.key?(k) }
|
|
95
|
-
end
|
|
77
|
+
def max_input
|
|
78
|
+
@config.max_input
|
|
96
79
|
end
|
|
97
80
|
|
|
98
|
-
def
|
|
99
|
-
@
|
|
81
|
+
def max_cost
|
|
82
|
+
@config.max_cost
|
|
100
83
|
end
|
|
101
84
|
|
|
102
|
-
def
|
|
103
|
-
|
|
104
|
-
status: error_result.status,
|
|
105
|
-
raw_output: error_result.raw_output,
|
|
106
|
-
parsed_output: error_result.parsed_output,
|
|
107
|
-
validation_errors: error_result.validation_errors,
|
|
108
|
-
trace: Trace.new(messages: messages, model: @model)
|
|
109
|
-
)
|
|
85
|
+
def model_name
|
|
86
|
+
@config.model
|
|
110
87
|
end
|
|
111
88
|
|
|
112
|
-
def
|
|
113
|
-
|
|
114
|
-
validation_result = validate_output(raw_output, input)
|
|
115
|
-
trace = Trace.new(messages: messages, model: @model, latency_ms: latency_ms, usage: response.usage)
|
|
116
|
-
|
|
117
|
-
Result.new(
|
|
118
|
-
status: validation_result[:status],
|
|
119
|
-
raw_output: raw_output,
|
|
120
|
-
parsed_output: validation_result[:parsed_output],
|
|
121
|
-
validation_errors: validation_result[:errors],
|
|
122
|
-
trace: trace
|
|
123
|
-
)
|
|
89
|
+
def on_unknown_pricing
|
|
90
|
+
@config.on_unknown_pricing
|
|
124
91
|
end
|
|
125
92
|
|
|
126
|
-
def
|
|
127
|
-
|
|
128
|
-
raw_output: raw_output,
|
|
129
|
-
definition: @contract_definition,
|
|
130
|
-
output_type: @output_type,
|
|
131
|
-
input: input,
|
|
132
|
-
schema: @output_schema
|
|
133
|
-
)
|
|
93
|
+
def effective_max_output
|
|
94
|
+
@config.effective_max_output
|
|
134
95
|
end
|
|
135
96
|
end
|
|
136
97
|
end
|
|
@@ -0,0 +1,37 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module RubyLLM
|
|
4
|
+
module Contract
|
|
5
|
+
module Step
|
|
6
|
+
RunnerConfig = Data.define(
|
|
7
|
+
:input_type,
|
|
8
|
+
:output_type,
|
|
9
|
+
:prompt_block,
|
|
10
|
+
:contract_definition,
|
|
11
|
+
:adapter,
|
|
12
|
+
:model,
|
|
13
|
+
:output_schema,
|
|
14
|
+
:max_output,
|
|
15
|
+
:max_input,
|
|
16
|
+
:max_cost,
|
|
17
|
+
:on_unknown_pricing,
|
|
18
|
+
:temperature,
|
|
19
|
+
:extra_options,
|
|
20
|
+
:observers
|
|
21
|
+
) do
|
|
22
|
+
def effective_max_output
|
|
23
|
+
extra_options[:max_tokens] || max_output
|
|
24
|
+
end
|
|
25
|
+
|
|
26
|
+
def adapter_options
|
|
27
|
+
{ model: model }.tap do |options|
|
|
28
|
+
options[:schema] = output_schema if output_schema
|
|
29
|
+
options[:max_tokens] = effective_max_output if effective_max_output
|
|
30
|
+
options[:temperature] = temperature if temperature
|
|
31
|
+
extra_options.each { |key, value| options[key] = value unless options.key?(key) }
|
|
32
|
+
end
|
|
33
|
+
end
|
|
34
|
+
end
|
|
35
|
+
end
|
|
36
|
+
end
|
|
37
|
+
end
|
|
@@ -3,6 +3,11 @@
|
|
|
3
3
|
require_relative "step/trace"
|
|
4
4
|
require_relative "step/result"
|
|
5
5
|
require_relative "step/limit_checker"
|
|
6
|
+
require_relative "step/runner_config"
|
|
7
|
+
require_relative "step/input_validator"
|
|
8
|
+
require_relative "step/prompt_compiler"
|
|
9
|
+
require_relative "step/adapter_caller"
|
|
10
|
+
require_relative "step/result_builder"
|
|
6
11
|
require_relative "step/runner"
|
|
7
12
|
require_relative "step/retry_policy"
|
|
8
13
|
require_relative "step/retry_executor"
|