ruby_llm-contract 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.rspec +3 -0
- data/.rubocop.yml +55 -0
- data/CHANGELOG.md +76 -0
- data/Gemfile +11 -0
- data/Gemfile.lock +176 -0
- data/LICENSE +21 -0
- data/README.md +154 -0
- data/Rakefile +8 -0
- data/examples/00_basics.rb +500 -0
- data/examples/01_classify_threads.rb +220 -0
- data/examples/02_generate_comment.rb +203 -0
- data/examples/03_target_audience.rb +201 -0
- data/examples/04_real_llm.rb +410 -0
- data/examples/05_output_schema.rb +258 -0
- data/examples/07_keyword_extraction.rb +239 -0
- data/examples/08_translation.rb +353 -0
- data/examples/09_eval_dataset.rb +287 -0
- data/examples/10_reddit_full_showcase.rb +363 -0
- data/examples/README.md +140 -0
- data/lib/ruby_llm/contract/adapters/base.rb +13 -0
- data/lib/ruby_llm/contract/adapters/response.rb +17 -0
- data/lib/ruby_llm/contract/adapters/ruby_llm.rb +94 -0
- data/lib/ruby_llm/contract/adapters/test.rb +44 -0
- data/lib/ruby_llm/contract/adapters.rb +6 -0
- data/lib/ruby_llm/contract/concerns/deep_symbolize.rb +17 -0
- data/lib/ruby_llm/contract/concerns/eval_host.rb +109 -0
- data/lib/ruby_llm/contract/concerns/trace_equality.rb +15 -0
- data/lib/ruby_llm/contract/concerns/usage_aggregator.rb +43 -0
- data/lib/ruby_llm/contract/configuration.rb +21 -0
- data/lib/ruby_llm/contract/contract/definition.rb +39 -0
- data/lib/ruby_llm/contract/contract/invariant.rb +23 -0
- data/lib/ruby_llm/contract/contract/parser.rb +143 -0
- data/lib/ruby_llm/contract/contract/schema_validator.rb +239 -0
- data/lib/ruby_llm/contract/contract/validator.rb +104 -0
- data/lib/ruby_llm/contract/contract.rb +7 -0
- data/lib/ruby_llm/contract/cost_calculator.rb +38 -0
- data/lib/ruby_llm/contract/dsl.rb +13 -0
- data/lib/ruby_llm/contract/errors.rb +19 -0
- data/lib/ruby_llm/contract/eval/case_result.rb +76 -0
- data/lib/ruby_llm/contract/eval/contract_detail_builder.rb +47 -0
- data/lib/ruby_llm/contract/eval/dataset.rb +53 -0
- data/lib/ruby_llm/contract/eval/eval_definition.rb +112 -0
- data/lib/ruby_llm/contract/eval/evaluation_result.rb +27 -0
- data/lib/ruby_llm/contract/eval/evaluator/exact.rb +20 -0
- data/lib/ruby_llm/contract/eval/evaluator/json_includes.rb +58 -0
- data/lib/ruby_llm/contract/eval/evaluator/proc_evaluator.rb +40 -0
- data/lib/ruby_llm/contract/eval/evaluator/regex.rb +27 -0
- data/lib/ruby_llm/contract/eval/model_comparison.rb +80 -0
- data/lib/ruby_llm/contract/eval/pipeline_result_adapter.rb +15 -0
- data/lib/ruby_llm/contract/eval/report.rb +115 -0
- data/lib/ruby_llm/contract/eval/runner.rb +162 -0
- data/lib/ruby_llm/contract/eval/trait_evaluator.rb +75 -0
- data/lib/ruby_llm/contract/eval.rb +16 -0
- data/lib/ruby_llm/contract/pipeline/base.rb +62 -0
- data/lib/ruby_llm/contract/pipeline/result.rb +131 -0
- data/lib/ruby_llm/contract/pipeline/runner.rb +139 -0
- data/lib/ruby_llm/contract/pipeline/trace.rb +72 -0
- data/lib/ruby_llm/contract/pipeline.rb +6 -0
- data/lib/ruby_llm/contract/prompt/ast.rb +38 -0
- data/lib/ruby_llm/contract/prompt/builder.rb +47 -0
- data/lib/ruby_llm/contract/prompt/node.rb +25 -0
- data/lib/ruby_llm/contract/prompt/nodes/example_node.rb +27 -0
- data/lib/ruby_llm/contract/prompt/nodes/rule_node.rb +15 -0
- data/lib/ruby_llm/contract/prompt/nodes/section_node.rb +26 -0
- data/lib/ruby_llm/contract/prompt/nodes/system_node.rb +15 -0
- data/lib/ruby_llm/contract/prompt/nodes/user_node.rb +15 -0
- data/lib/ruby_llm/contract/prompt/nodes.rb +7 -0
- data/lib/ruby_llm/contract/prompt/renderer.rb +76 -0
- data/lib/ruby_llm/contract/railtie.rb +20 -0
- data/lib/ruby_llm/contract/rake_task.rb +78 -0
- data/lib/ruby_llm/contract/rspec/pass_eval.rb +96 -0
- data/lib/ruby_llm/contract/rspec/satisfy_contract.rb +31 -0
- data/lib/ruby_llm/contract/rspec.rb +6 -0
- data/lib/ruby_llm/contract/step/base.rb +138 -0
- data/lib/ruby_llm/contract/step/dsl.rb +144 -0
- data/lib/ruby_llm/contract/step/limit_checker.rb +64 -0
- data/lib/ruby_llm/contract/step/result.rb +38 -0
- data/lib/ruby_llm/contract/step/retry_executor.rb +90 -0
- data/lib/ruby_llm/contract/step/retry_policy.rb +76 -0
- data/lib/ruby_llm/contract/step/runner.rb +126 -0
- data/lib/ruby_llm/contract/step/trace.rb +70 -0
- data/lib/ruby_llm/contract/step.rb +10 -0
- data/lib/ruby_llm/contract/token_estimator.rb +19 -0
- data/lib/ruby_llm/contract/types.rb +11 -0
- data/lib/ruby_llm/contract/version.rb +7 -0
- data/lib/ruby_llm/contract.rb +108 -0
- data/ruby_llm-contract.gemspec +33 -0
- metadata +172 -0
|
@@ -0,0 +1,38 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module RubyLLM
|
|
4
|
+
module Contract
|
|
5
|
+
module Prompt
|
|
6
|
+
class AST
|
|
7
|
+
include Enumerable
|
|
8
|
+
|
|
9
|
+
attr_reader :nodes
|
|
10
|
+
|
|
11
|
+
def initialize(nodes)
|
|
12
|
+
@nodes = nodes.dup.freeze
|
|
13
|
+
freeze
|
|
14
|
+
end
|
|
15
|
+
|
|
16
|
+
def each(&)
|
|
17
|
+
@nodes.each(&)
|
|
18
|
+
end
|
|
19
|
+
|
|
20
|
+
def size
|
|
21
|
+
@nodes.size
|
|
22
|
+
end
|
|
23
|
+
|
|
24
|
+
def [](index)
|
|
25
|
+
@nodes[index]
|
|
26
|
+
end
|
|
27
|
+
|
|
28
|
+
def ==(other)
|
|
29
|
+
other.is_a?(self.class) && nodes == other.nodes
|
|
30
|
+
end
|
|
31
|
+
|
|
32
|
+
def to_a
|
|
33
|
+
@nodes.map(&:to_h)
|
|
34
|
+
end
|
|
35
|
+
end
|
|
36
|
+
end
|
|
37
|
+
end
|
|
38
|
+
end
|
|
@@ -0,0 +1,47 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module RubyLLM
|
|
4
|
+
module Contract
|
|
5
|
+
module Prompt
|
|
6
|
+
class Builder
|
|
7
|
+
def initialize(block)
|
|
8
|
+
@block = block
|
|
9
|
+
@nodes = []
|
|
10
|
+
end
|
|
11
|
+
|
|
12
|
+
def build(input = nil)
|
|
13
|
+
if input && @block.arity >= 1
|
|
14
|
+
instance_exec(input, &@block)
|
|
15
|
+
else
|
|
16
|
+
instance_eval(&@block)
|
|
17
|
+
end
|
|
18
|
+
AST.new(@nodes)
|
|
19
|
+
end
|
|
20
|
+
|
|
21
|
+
def system(text)
|
|
22
|
+
@nodes << Nodes::SystemNode.new(text)
|
|
23
|
+
end
|
|
24
|
+
|
|
25
|
+
def rule(text)
|
|
26
|
+
@nodes << Nodes::RuleNode.new(text)
|
|
27
|
+
end
|
|
28
|
+
|
|
29
|
+
def example(input:, output:)
|
|
30
|
+
@nodes << Nodes::ExampleNode.new(input: input, output: output)
|
|
31
|
+
end
|
|
32
|
+
|
|
33
|
+
def user(text)
|
|
34
|
+
@nodes << Nodes::UserNode.new(text)
|
|
35
|
+
end
|
|
36
|
+
|
|
37
|
+
def section(name, text)
|
|
38
|
+
@nodes << Nodes::SectionNode.new(name, text)
|
|
39
|
+
end
|
|
40
|
+
|
|
41
|
+
def self.build(input: nil, &block)
|
|
42
|
+
new(block).build(input)
|
|
43
|
+
end
|
|
44
|
+
end
|
|
45
|
+
end
|
|
46
|
+
end
|
|
47
|
+
end
|
|
@@ -0,0 +1,25 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module RubyLLM
|
|
4
|
+
module Contract
|
|
5
|
+
module Prompt
|
|
6
|
+
class Node
|
|
7
|
+
attr_reader :type, :content
|
|
8
|
+
|
|
9
|
+
def initialize(type:, content:)
|
|
10
|
+
@type = type.freeze
|
|
11
|
+
@content = content.freeze
|
|
12
|
+
freeze
|
|
13
|
+
end
|
|
14
|
+
|
|
15
|
+
def ==(other)
|
|
16
|
+
other.is_a?(self.class) && type == other.type && content == other.content
|
|
17
|
+
end
|
|
18
|
+
|
|
19
|
+
def to_h
|
|
20
|
+
{ type: @type, content: @content }
|
|
21
|
+
end
|
|
22
|
+
end
|
|
23
|
+
end
|
|
24
|
+
end
|
|
25
|
+
end
|
|
@@ -0,0 +1,27 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module RubyLLM
|
|
4
|
+
module Contract
|
|
5
|
+
module Prompt
|
|
6
|
+
module Nodes
|
|
7
|
+
class ExampleNode < Node
|
|
8
|
+
attr_reader :input, :output
|
|
9
|
+
|
|
10
|
+
def initialize(input:, output:)
|
|
11
|
+
@input = input.freeze
|
|
12
|
+
@output = output.freeze
|
|
13
|
+
super(type: :example, content: nil)
|
|
14
|
+
end
|
|
15
|
+
|
|
16
|
+
def ==(other)
|
|
17
|
+
other.is_a?(self.class) && type == other.type && input == other.input && output == other.output
|
|
18
|
+
end
|
|
19
|
+
|
|
20
|
+
def to_h
|
|
21
|
+
{ type: :example, input: @input, output: @output }
|
|
22
|
+
end
|
|
23
|
+
end
|
|
24
|
+
end
|
|
25
|
+
end
|
|
26
|
+
end
|
|
27
|
+
end
|
|
@@ -0,0 +1,26 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module RubyLLM
|
|
4
|
+
module Contract
|
|
5
|
+
module Prompt
|
|
6
|
+
module Nodes
|
|
7
|
+
class SectionNode < Node
|
|
8
|
+
attr_reader :name
|
|
9
|
+
|
|
10
|
+
def initialize(name, content)
|
|
11
|
+
@name = name.freeze
|
|
12
|
+
super(type: :section, content: content)
|
|
13
|
+
end
|
|
14
|
+
|
|
15
|
+
def ==(other)
|
|
16
|
+
other.is_a?(self.class) && type == other.type && name == other.name && content == other.content
|
|
17
|
+
end
|
|
18
|
+
|
|
19
|
+
def to_h
|
|
20
|
+
{ type: :section, name: @name, content: @content }
|
|
21
|
+
end
|
|
22
|
+
end
|
|
23
|
+
end
|
|
24
|
+
end
|
|
25
|
+
end
|
|
26
|
+
end
|
|
@@ -0,0 +1,76 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require "json"
|
|
4
|
+
|
|
5
|
+
module RubyLLM
|
|
6
|
+
module Contract
|
|
7
|
+
module Prompt
|
|
8
|
+
class Renderer
|
|
9
|
+
def render(ast, variables: {})
|
|
10
|
+
ast.each_with_object([]) do |node, messages|
|
|
11
|
+
render_node(node, variables, messages)
|
|
12
|
+
end
|
|
13
|
+
end
|
|
14
|
+
|
|
15
|
+
def self.render(ast, variables: {})
|
|
16
|
+
new.render(ast, variables: variables)
|
|
17
|
+
end
|
|
18
|
+
|
|
19
|
+
private
|
|
20
|
+
|
|
21
|
+
def render_node(node, variables, messages)
|
|
22
|
+
case node
|
|
23
|
+
when Nodes::SystemNode, Nodes::RuleNode
|
|
24
|
+
append_message(messages, :system, node.content, variables)
|
|
25
|
+
when Nodes::ExampleNode
|
|
26
|
+
append_message(messages, :user, node.input, variables)
|
|
27
|
+
append_message(messages, :assistant, node.output, variables)
|
|
28
|
+
when Nodes::UserNode
|
|
29
|
+
append_message(messages, :user, node.content, variables)
|
|
30
|
+
when Nodes::SectionNode
|
|
31
|
+
render_section_node(node, variables, messages)
|
|
32
|
+
end
|
|
33
|
+
end
|
|
34
|
+
|
|
35
|
+
def append_message(messages, role, raw_content, variables)
|
|
36
|
+
content = interpolate(raw_content, variables)
|
|
37
|
+
messages << { role: role, content: content } if content_present?(content)
|
|
38
|
+
end
|
|
39
|
+
|
|
40
|
+
def render_section_node(node, variables, messages)
|
|
41
|
+
section_content = node.content.is_a?(Hash) || node.content.is_a?(Array) ? node.content.to_json : node.content
|
|
42
|
+
return unless content_present?(section_content)
|
|
43
|
+
|
|
44
|
+
safe_name = sanitize_section_name(node.name)
|
|
45
|
+
body = interpolate(section_content, variables)
|
|
46
|
+
messages << { role: :system, content: "[#{safe_name}]\n#{body}" }
|
|
47
|
+
end
|
|
48
|
+
|
|
49
|
+
def content_present?(content)
|
|
50
|
+
content.to_s.strip != ""
|
|
51
|
+
end
|
|
52
|
+
|
|
53
|
+
def sanitize_section_name(name)
|
|
54
|
+
name.to_s.gsub(/[\[\]\n\r]/, " ").strip
|
|
55
|
+
end
|
|
56
|
+
|
|
57
|
+
def interpolate(text, variables)
|
|
58
|
+
return text if text.nil?
|
|
59
|
+
return text.to_json if text.is_a?(Hash) || text.is_a?(Array)
|
|
60
|
+
|
|
61
|
+
# Coerce non-String content (Integer, Symbol, etc.) to String before gsub
|
|
62
|
+
text = text.to_s unless text.is_a?(String)
|
|
63
|
+
|
|
64
|
+
text.gsub(/\{(\w+)\}/) do |match|
|
|
65
|
+
key = ::Regexp.last_match(1).to_sym
|
|
66
|
+
variables.key?(key) ? serialize_value(variables[key]) : match
|
|
67
|
+
end
|
|
68
|
+
end
|
|
69
|
+
|
|
70
|
+
def serialize_value(value)
|
|
71
|
+
value.is_a?(Hash) || value.is_a?(Array) ? value.to_json : value.to_s
|
|
72
|
+
end
|
|
73
|
+
end
|
|
74
|
+
end
|
|
75
|
+
end
|
|
76
|
+
end
|
|
@@ -0,0 +1,20 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module RubyLLM
|
|
4
|
+
module Contract
|
|
5
|
+
class Railtie < ::Rails::Railtie
|
|
6
|
+
# Eval files (e.g. classify_threads_eval.rb) don't define Zeitwerk-compatible
|
|
7
|
+
# constants — they call define_eval on an existing Step class. We use `load`
|
|
8
|
+
# after initialization, and hook into the reloader for development.
|
|
9
|
+
|
|
10
|
+
config.after_initialize do
|
|
11
|
+
RubyLLM::Contract.load_evals!
|
|
12
|
+
end
|
|
13
|
+
|
|
14
|
+
# Re-load eval files on code reload in development (Spring, zeitwerk:check, etc.)
|
|
15
|
+
config.to_prepare do
|
|
16
|
+
RubyLLM::Contract.load_evals!
|
|
17
|
+
end
|
|
18
|
+
end
|
|
19
|
+
end
|
|
20
|
+
end
|
|
@@ -0,0 +1,78 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require "rake"
|
|
4
|
+
require "rake/tasklib"
|
|
5
|
+
|
|
6
|
+
module RubyLLM
|
|
7
|
+
module Contract
|
|
8
|
+
class RakeTask < ::Rake::TaskLib
|
|
9
|
+
attr_accessor :name, :context, :fail_on_empty, :minimum_score, :maximum_cost, :eval_dirs
|
|
10
|
+
|
|
11
|
+
def initialize(name = :"ruby_llm_contract:eval", &block)
|
|
12
|
+
super()
|
|
13
|
+
@name = name
|
|
14
|
+
@context = {}
|
|
15
|
+
@fail_on_empty = true
|
|
16
|
+
@minimum_score = nil # nil = require 100%; float = threshold
|
|
17
|
+
@maximum_cost = nil # nil = no cost limit; float = budget cap (suite-level)
|
|
18
|
+
@eval_dirs = [] # directories to load eval files from (non-Rails)
|
|
19
|
+
block&.call(self)
|
|
20
|
+
define_task
|
|
21
|
+
end
|
|
22
|
+
|
|
23
|
+
private
|
|
24
|
+
|
|
25
|
+
def define_task
|
|
26
|
+
desc "Run all ruby_llm-contract evals"
|
|
27
|
+
task(@name => task_prerequisites) do
|
|
28
|
+
require "ruby_llm/contract"
|
|
29
|
+
@eval_dirs.each { |dir| RubyLLM::Contract.load_evals!(dir) }
|
|
30
|
+
RubyLLM::Contract.load_evals!
|
|
31
|
+
|
|
32
|
+
results = RubyLLM::Contract.run_all_evals(context: @context)
|
|
33
|
+
|
|
34
|
+
if results.empty?
|
|
35
|
+
if @fail_on_empty
|
|
36
|
+
abort "No evals defined. Define evals with define_eval or set fail_on_empty = false."
|
|
37
|
+
else
|
|
38
|
+
puts "No evals defined."
|
|
39
|
+
next
|
|
40
|
+
end
|
|
41
|
+
end
|
|
42
|
+
|
|
43
|
+
gate_passed = true
|
|
44
|
+
suite_cost = 0.0
|
|
45
|
+
|
|
46
|
+
results.each do |host, reports|
|
|
47
|
+
puts "\n#{host.name || host.to_s}"
|
|
48
|
+
reports.each_value do |report|
|
|
49
|
+
report.print_summary
|
|
50
|
+
suite_cost += report.total_cost
|
|
51
|
+
gate_passed = false unless report_meets_score?(report)
|
|
52
|
+
end
|
|
53
|
+
end
|
|
54
|
+
|
|
55
|
+
if @maximum_cost && suite_cost > @maximum_cost
|
|
56
|
+
abort "\nEval suite FAILED: total cost $#{format("%.4f", suite_cost)} " \
|
|
57
|
+
"exceeds budget $#{format("%.4f", @maximum_cost)}"
|
|
58
|
+
end
|
|
59
|
+
|
|
60
|
+
abort "\nEval suite FAILED" unless gate_passed
|
|
61
|
+
puts "\nAll evals passed."
|
|
62
|
+
end
|
|
63
|
+
end
|
|
64
|
+
|
|
65
|
+
def report_meets_score?(report)
|
|
66
|
+
if @minimum_score
|
|
67
|
+
report.score >= @minimum_score
|
|
68
|
+
else
|
|
69
|
+
report.passed?
|
|
70
|
+
end
|
|
71
|
+
end
|
|
72
|
+
|
|
73
|
+
def task_prerequisites
|
|
74
|
+
Rake::Task.task_defined?(:environment) ? [:environment] : []
|
|
75
|
+
end
|
|
76
|
+
end
|
|
77
|
+
end
|
|
78
|
+
end
|
|
@@ -0,0 +1,96 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module RubyLLM
|
|
4
|
+
module Contract
|
|
5
|
+
module RSpec
|
|
6
|
+
# Helper methods for the pass_eval matcher to keep the block short.
|
|
7
|
+
module PassEvalHelpers
|
|
8
|
+
def format_failure_message(eval_name, error, report, minimum_score, maximum_cost)
|
|
9
|
+
return format_error_message(eval_name, error) if error
|
|
10
|
+
|
|
11
|
+
format_report_message(eval_name, report, minimum_score, maximum_cost)
|
|
12
|
+
end
|
|
13
|
+
|
|
14
|
+
def format_error_message(eval_name, error)
|
|
15
|
+
"expected #{eval_name} eval to pass, but it raised an error:\n #{error.class}: #{error.message}"
|
|
16
|
+
end
|
|
17
|
+
|
|
18
|
+
def format_report_message(eval_name, report, minimum_score, maximum_cost)
|
|
19
|
+
lines = build_header(eval_name, report, minimum_score, maximum_cost)
|
|
20
|
+
lines << ""
|
|
21
|
+
|
|
22
|
+
report.results.each do |result|
|
|
23
|
+
cost_str = result.cost ? " $#{format("%.4f", result.cost)}" : ""
|
|
24
|
+
lines << " #{result.label} #{result.name} (score: #{result.score})#{cost_str}"
|
|
25
|
+
lines << " #{result.details}" if result.details && result.failed?
|
|
26
|
+
end
|
|
27
|
+
|
|
28
|
+
lines.join("\n")
|
|
29
|
+
end
|
|
30
|
+
|
|
31
|
+
private
|
|
32
|
+
|
|
33
|
+
def build_header(eval_name, report, minimum_score, maximum_cost)
|
|
34
|
+
cost_str = report.total_cost.positive? ? ", cost: $#{format("%.4f", report.total_cost)}" : ""
|
|
35
|
+
|
|
36
|
+
if maximum_cost && report.total_cost > maximum_cost
|
|
37
|
+
["expected #{eval_name} eval cost <= $#{format("%.4f", maximum_cost)}, " \
|
|
38
|
+
"but got: $#{format("%.4f", report.total_cost)} (#{report.pass_rate})"]
|
|
39
|
+
elsif minimum_score
|
|
40
|
+
["expected #{eval_name} eval score >= #{minimum_score}, " \
|
|
41
|
+
"but got: #{report.score.round(2)} (#{report.pass_rate}#{cost_str})"]
|
|
42
|
+
else
|
|
43
|
+
["expected #{eval_name} eval to pass, " \
|
|
44
|
+
"but got score: #{report.score.round(2)} (#{report.pass_rate}#{cost_str})"]
|
|
45
|
+
end
|
|
46
|
+
end
|
|
47
|
+
end
|
|
48
|
+
end
|
|
49
|
+
end
|
|
50
|
+
end
|
|
51
|
+
|
|
52
|
+
RSpec::Matchers.define :pass_eval do |eval_name|
|
|
53
|
+
include RubyLLM::Contract::RSpec::PassEvalHelpers
|
|
54
|
+
|
|
55
|
+
chain :with_context do |ctx|
|
|
56
|
+
@context = ctx
|
|
57
|
+
end
|
|
58
|
+
|
|
59
|
+
chain :with_minimum_score do |score|
|
|
60
|
+
@minimum_score = score
|
|
61
|
+
end
|
|
62
|
+
|
|
63
|
+
chain :with_maximum_cost do |cost|
|
|
64
|
+
@maximum_cost = cost
|
|
65
|
+
end
|
|
66
|
+
|
|
67
|
+
match do |step_or_pipeline|
|
|
68
|
+
@eval_name = eval_name
|
|
69
|
+
@context ||= {}
|
|
70
|
+
@minimum_score ||= nil
|
|
71
|
+
@maximum_cost ||= nil
|
|
72
|
+
@error = nil
|
|
73
|
+
@report = step_or_pipeline.run_eval(eval_name, context: @context)
|
|
74
|
+
|
|
75
|
+
score_ok = if @minimum_score
|
|
76
|
+
@report.score >= @minimum_score
|
|
77
|
+
else
|
|
78
|
+
@report.passed?
|
|
79
|
+
end
|
|
80
|
+
|
|
81
|
+
cost_ok = @maximum_cost ? @report.total_cost <= @maximum_cost : true
|
|
82
|
+
|
|
83
|
+
score_ok && cost_ok
|
|
84
|
+
rescue StandardError => e
|
|
85
|
+
@error = e
|
|
86
|
+
false
|
|
87
|
+
end
|
|
88
|
+
|
|
89
|
+
failure_message do
|
|
90
|
+
format_failure_message(@eval_name, @error, @report, @minimum_score, @maximum_cost)
|
|
91
|
+
end
|
|
92
|
+
|
|
93
|
+
failure_message_when_negated do
|
|
94
|
+
"expected #{@eval_name} eval NOT to pass, but it passed with score: #{@report.score.round(2)}"
|
|
95
|
+
end
|
|
96
|
+
end
|
|
@@ -0,0 +1,31 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
RSpec::Matchers.define :satisfy_contract do
|
|
4
|
+
match do |result|
|
|
5
|
+
@result = result
|
|
6
|
+
result.respond_to?(:ok?) && result.ok?
|
|
7
|
+
end
|
|
8
|
+
|
|
9
|
+
failure_message do
|
|
10
|
+
lines = ["expected step result to satisfy contract, but got status: #{@result.status}"]
|
|
11
|
+
|
|
12
|
+
if @result.respond_to?(:validation_errors) && @result.validation_errors.any?
|
|
13
|
+
lines << ""
|
|
14
|
+
lines << "Validation errors:"
|
|
15
|
+
@result.validation_errors.each { |e| lines << " - #{e}" }
|
|
16
|
+
end
|
|
17
|
+
|
|
18
|
+
if @result.respond_to?(:raw_output) && @result.raw_output
|
|
19
|
+
output = @result.raw_output.to_s
|
|
20
|
+
output = "#{output[0, 200]}..." if output.size > 200
|
|
21
|
+
lines << ""
|
|
22
|
+
lines << "Raw output: #{output}"
|
|
23
|
+
end
|
|
24
|
+
|
|
25
|
+
lines.join("\n")
|
|
26
|
+
end
|
|
27
|
+
|
|
28
|
+
failure_message_when_negated do
|
|
29
|
+
"expected step result NOT to satisfy contract, but it passed with status: :ok"
|
|
30
|
+
end
|
|
31
|
+
end
|
|
@@ -0,0 +1,138 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module RubyLLM
|
|
4
|
+
module Contract
|
|
5
|
+
module Step
|
|
6
|
+
class Base
|
|
7
|
+
def self.inherited(subclass)
|
|
8
|
+
super
|
|
9
|
+
Contract.register_eval_host(subclass) if respond_to?(:eval_defined?) && eval_defined?
|
|
10
|
+
end
|
|
11
|
+
|
|
12
|
+
class << self
|
|
13
|
+
include Concerns::EvalHost
|
|
14
|
+
include RetryExecutor
|
|
15
|
+
include Dsl
|
|
16
|
+
|
|
17
|
+
def eval_case(input:, expected: nil, expected_traits: nil, evaluator: nil, context: {})
|
|
18
|
+
dataset = Eval::Dataset.define("single_case") do
|
|
19
|
+
add_case("inline", input: input, expected: expected,
|
|
20
|
+
expected_traits: expected_traits, evaluator: evaluator)
|
|
21
|
+
end
|
|
22
|
+
report = Eval::Runner.run(step: self, dataset: dataset, context: context)
|
|
23
|
+
report.results.first
|
|
24
|
+
end
|
|
25
|
+
|
|
26
|
+
def estimate_cost(input:, model: nil)
|
|
27
|
+
model_name = model || RubyLLM::Contract.configuration.default_model
|
|
28
|
+
messages = build_messages(input)
|
|
29
|
+
input_tokens = TokenEstimator.estimate(messages)
|
|
30
|
+
output_tokens = max_output || 256 # conservative default
|
|
31
|
+
|
|
32
|
+
model_info = CostCalculator.send(:find_model, model_name)
|
|
33
|
+
return nil unless model_info
|
|
34
|
+
|
|
35
|
+
estimated = CostCalculator.send(:compute_cost, model_info,
|
|
36
|
+
{ input_tokens: input_tokens, output_tokens: output_tokens })
|
|
37
|
+
{
|
|
38
|
+
model: model_name,
|
|
39
|
+
input_tokens: input_tokens,
|
|
40
|
+
output_tokens_estimate: output_tokens,
|
|
41
|
+
estimated_cost: estimated
|
|
42
|
+
}
|
|
43
|
+
end
|
|
44
|
+
|
|
45
|
+
def estimate_eval_cost(eval_name, models: nil)
|
|
46
|
+
defn = send(:all_eval_definitions)[eval_name.to_s]
|
|
47
|
+
raise ArgumentError, "No eval '#{eval_name}' defined" unless defn
|
|
48
|
+
|
|
49
|
+
model_list = models || [RubyLLM::Contract.configuration.default_model].compact
|
|
50
|
+
cases = defn.build_dataset.cases
|
|
51
|
+
|
|
52
|
+
model_list.each_with_object({}) do |model_name, result|
|
|
53
|
+
per_case = cases.sum do |c|
|
|
54
|
+
est = estimate_cost(input: c.input, model: model_name)
|
|
55
|
+
est ? est[:estimated_cost] : 0.0
|
|
56
|
+
end
|
|
57
|
+
result[model_name] = per_case.round(6)
|
|
58
|
+
end
|
|
59
|
+
end
|
|
60
|
+
|
|
61
|
+
KNOWN_CONTEXT_KEYS = %i[adapter model temperature max_tokens schema provider assume_model_exists].freeze
|
|
62
|
+
|
|
63
|
+
def run(input, context: {})
|
|
64
|
+
warn_unknown_context_keys(context)
|
|
65
|
+
adapter = resolve_adapter(context)
|
|
66
|
+
default_model = context[:model] || RubyLLM::Contract.configuration.default_model
|
|
67
|
+
policy = retry_policy
|
|
68
|
+
|
|
69
|
+
if policy
|
|
70
|
+
run_with_retry(input, adapter: adapter, default_model: default_model, policy: policy)
|
|
71
|
+
else
|
|
72
|
+
run_once(input, adapter: adapter, model: default_model)
|
|
73
|
+
end
|
|
74
|
+
end
|
|
75
|
+
|
|
76
|
+
private
|
|
77
|
+
|
|
78
|
+
def warn_unknown_context_keys(context)
|
|
79
|
+
unknown = context.keys - KNOWN_CONTEXT_KEYS
|
|
80
|
+
return if unknown.empty?
|
|
81
|
+
|
|
82
|
+
warn "[ruby_llm-contract] Unknown context keys: #{unknown.inspect}. " \
|
|
83
|
+
"Known keys: #{KNOWN_CONTEXT_KEYS.inspect}"
|
|
84
|
+
end
|
|
85
|
+
|
|
86
|
+
def resolve_adapter(context)
|
|
87
|
+
adapter = context[:adapter] || RubyLLM::Contract.configuration.default_adapter
|
|
88
|
+
return adapter if adapter
|
|
89
|
+
|
|
90
|
+
raise RubyLLM::Contract::Error, "No adapter configured. Set one with RubyLLM::Contract.configure " \
|
|
91
|
+
"{ |c| c.default_adapter = ... } or pass context: { adapter: ... }"
|
|
92
|
+
end
|
|
93
|
+
|
|
94
|
+
def run_once(input, adapter:, model:)
|
|
95
|
+
Runner.new(
|
|
96
|
+
input_type: input_type, output_type: output_type,
|
|
97
|
+
prompt_block: prompt, contract_definition: effective_contract,
|
|
98
|
+
adapter: adapter, model: model, output_schema: output_schema,
|
|
99
|
+
max_output: max_output, max_input: max_input, max_cost: max_cost
|
|
100
|
+
).call(input)
|
|
101
|
+
rescue ArgumentError => e
|
|
102
|
+
Result.new(status: :input_error, raw_output: nil, parsed_output: nil,
|
|
103
|
+
validation_errors: [e.message])
|
|
104
|
+
end
|
|
105
|
+
|
|
106
|
+
def effective_contract
|
|
107
|
+
base = contract
|
|
108
|
+
extra = class_validates
|
|
109
|
+
inferred_parse = json_compatible_type?(output_type) ? :json : nil
|
|
110
|
+
|
|
111
|
+
return base if extra.empty? && inferred_parse.nil?
|
|
112
|
+
|
|
113
|
+
has_own_contract = defined?(@contract_definition) && @contract_definition
|
|
114
|
+
Definition.merge(
|
|
115
|
+
base,
|
|
116
|
+
extra_invariants: extra,
|
|
117
|
+
parse_override: inferred_parse && !has_own_contract ? inferred_parse : nil
|
|
118
|
+
)
|
|
119
|
+
end
|
|
120
|
+
|
|
121
|
+
def build_messages(input)
|
|
122
|
+
dynamic = prompt.arity >= 1
|
|
123
|
+
ast = Prompt::Builder.build(input: dynamic ? input : nil, &prompt)
|
|
124
|
+
variables = dynamic ? {} : { input: input }
|
|
125
|
+
variables.merge!(input.transform_keys(&:to_sym)) if !dynamic && input.is_a?(Hash)
|
|
126
|
+
Prompt::Renderer.render(ast, variables: variables)
|
|
127
|
+
end
|
|
128
|
+
|
|
129
|
+
def json_compatible_type?(type)
|
|
130
|
+
type == RubyLLM::Contract::Types::Hash || type == Hash ||
|
|
131
|
+
type == RubyLLM::Contract::Types::Array || type == Array ||
|
|
132
|
+
(type.respond_to?(:name) && type.name&.match?(/Hash|Array/))
|
|
133
|
+
end
|
|
134
|
+
end
|
|
135
|
+
end
|
|
136
|
+
end
|
|
137
|
+
end
|
|
138
|
+
end
|