ruby_llm-contract 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (89) hide show
  1. checksums.yaml +7 -0
  2. data/.rspec +3 -0
  3. data/.rubocop.yml +55 -0
  4. data/CHANGELOG.md +76 -0
  5. data/Gemfile +11 -0
  6. data/Gemfile.lock +176 -0
  7. data/LICENSE +21 -0
  8. data/README.md +154 -0
  9. data/Rakefile +8 -0
  10. data/examples/00_basics.rb +500 -0
  11. data/examples/01_classify_threads.rb +220 -0
  12. data/examples/02_generate_comment.rb +203 -0
  13. data/examples/03_target_audience.rb +201 -0
  14. data/examples/04_real_llm.rb +410 -0
  15. data/examples/05_output_schema.rb +258 -0
  16. data/examples/07_keyword_extraction.rb +239 -0
  17. data/examples/08_translation.rb +353 -0
  18. data/examples/09_eval_dataset.rb +287 -0
  19. data/examples/10_reddit_full_showcase.rb +363 -0
  20. data/examples/README.md +140 -0
  21. data/lib/ruby_llm/contract/adapters/base.rb +13 -0
  22. data/lib/ruby_llm/contract/adapters/response.rb +17 -0
  23. data/lib/ruby_llm/contract/adapters/ruby_llm.rb +94 -0
  24. data/lib/ruby_llm/contract/adapters/test.rb +44 -0
  25. data/lib/ruby_llm/contract/adapters.rb +6 -0
  26. data/lib/ruby_llm/contract/concerns/deep_symbolize.rb +17 -0
  27. data/lib/ruby_llm/contract/concerns/eval_host.rb +109 -0
  28. data/lib/ruby_llm/contract/concerns/trace_equality.rb +15 -0
  29. data/lib/ruby_llm/contract/concerns/usage_aggregator.rb +43 -0
  30. data/lib/ruby_llm/contract/configuration.rb +21 -0
  31. data/lib/ruby_llm/contract/contract/definition.rb +39 -0
  32. data/lib/ruby_llm/contract/contract/invariant.rb +23 -0
  33. data/lib/ruby_llm/contract/contract/parser.rb +143 -0
  34. data/lib/ruby_llm/contract/contract/schema_validator.rb +239 -0
  35. data/lib/ruby_llm/contract/contract/validator.rb +104 -0
  36. data/lib/ruby_llm/contract/contract.rb +7 -0
  37. data/lib/ruby_llm/contract/cost_calculator.rb +38 -0
  38. data/lib/ruby_llm/contract/dsl.rb +13 -0
  39. data/lib/ruby_llm/contract/errors.rb +19 -0
  40. data/lib/ruby_llm/contract/eval/case_result.rb +76 -0
  41. data/lib/ruby_llm/contract/eval/contract_detail_builder.rb +47 -0
  42. data/lib/ruby_llm/contract/eval/dataset.rb +53 -0
  43. data/lib/ruby_llm/contract/eval/eval_definition.rb +112 -0
  44. data/lib/ruby_llm/contract/eval/evaluation_result.rb +27 -0
  45. data/lib/ruby_llm/contract/eval/evaluator/exact.rb +20 -0
  46. data/lib/ruby_llm/contract/eval/evaluator/json_includes.rb +58 -0
  47. data/lib/ruby_llm/contract/eval/evaluator/proc_evaluator.rb +40 -0
  48. data/lib/ruby_llm/contract/eval/evaluator/regex.rb +27 -0
  49. data/lib/ruby_llm/contract/eval/model_comparison.rb +80 -0
  50. data/lib/ruby_llm/contract/eval/pipeline_result_adapter.rb +15 -0
  51. data/lib/ruby_llm/contract/eval/report.rb +115 -0
  52. data/lib/ruby_llm/contract/eval/runner.rb +162 -0
  53. data/lib/ruby_llm/contract/eval/trait_evaluator.rb +75 -0
  54. data/lib/ruby_llm/contract/eval.rb +16 -0
  55. data/lib/ruby_llm/contract/pipeline/base.rb +62 -0
  56. data/lib/ruby_llm/contract/pipeline/result.rb +131 -0
  57. data/lib/ruby_llm/contract/pipeline/runner.rb +139 -0
  58. data/lib/ruby_llm/contract/pipeline/trace.rb +72 -0
  59. data/lib/ruby_llm/contract/pipeline.rb +6 -0
  60. data/lib/ruby_llm/contract/prompt/ast.rb +38 -0
  61. data/lib/ruby_llm/contract/prompt/builder.rb +47 -0
  62. data/lib/ruby_llm/contract/prompt/node.rb +25 -0
  63. data/lib/ruby_llm/contract/prompt/nodes/example_node.rb +27 -0
  64. data/lib/ruby_llm/contract/prompt/nodes/rule_node.rb +15 -0
  65. data/lib/ruby_llm/contract/prompt/nodes/section_node.rb +26 -0
  66. data/lib/ruby_llm/contract/prompt/nodes/system_node.rb +15 -0
  67. data/lib/ruby_llm/contract/prompt/nodes/user_node.rb +15 -0
  68. data/lib/ruby_llm/contract/prompt/nodes.rb +7 -0
  69. data/lib/ruby_llm/contract/prompt/renderer.rb +76 -0
  70. data/lib/ruby_llm/contract/railtie.rb +20 -0
  71. data/lib/ruby_llm/contract/rake_task.rb +78 -0
  72. data/lib/ruby_llm/contract/rspec/pass_eval.rb +96 -0
  73. data/lib/ruby_llm/contract/rspec/satisfy_contract.rb +31 -0
  74. data/lib/ruby_llm/contract/rspec.rb +6 -0
  75. data/lib/ruby_llm/contract/step/base.rb +138 -0
  76. data/lib/ruby_llm/contract/step/dsl.rb +144 -0
  77. data/lib/ruby_llm/contract/step/limit_checker.rb +64 -0
  78. data/lib/ruby_llm/contract/step/result.rb +38 -0
  79. data/lib/ruby_llm/contract/step/retry_executor.rb +90 -0
  80. data/lib/ruby_llm/contract/step/retry_policy.rb +76 -0
  81. data/lib/ruby_llm/contract/step/runner.rb +126 -0
  82. data/lib/ruby_llm/contract/step/trace.rb +70 -0
  83. data/lib/ruby_llm/contract/step.rb +10 -0
  84. data/lib/ruby_llm/contract/token_estimator.rb +19 -0
  85. data/lib/ruby_llm/contract/types.rb +11 -0
  86. data/lib/ruby_llm/contract/version.rb +7 -0
  87. data/lib/ruby_llm/contract.rb +108 -0
  88. data/ruby_llm-contract.gemspec +33 -0
  89. metadata +172 -0
@@ -0,0 +1,38 @@
1
+ # frozen_string_literal: true
2
+
3
+ module RubyLLM
4
+ module Contract
5
+ module Prompt
6
+ class AST
7
+ include Enumerable
8
+
9
+ attr_reader :nodes
10
+
11
+ def initialize(nodes)
12
+ @nodes = nodes.dup.freeze
13
+ freeze
14
+ end
15
+
16
+ def each(&)
17
+ @nodes.each(&)
18
+ end
19
+
20
+ def size
21
+ @nodes.size
22
+ end
23
+
24
+ def [](index)
25
+ @nodes[index]
26
+ end
27
+
28
+ def ==(other)
29
+ other.is_a?(self.class) && nodes == other.nodes
30
+ end
31
+
32
+ def to_a
33
+ @nodes.map(&:to_h)
34
+ end
35
+ end
36
+ end
37
+ end
38
+ end
@@ -0,0 +1,47 @@
1
+ # frozen_string_literal: true
2
+
3
+ module RubyLLM
4
+ module Contract
5
+ module Prompt
6
+ class Builder
7
+ def initialize(block)
8
+ @block = block
9
+ @nodes = []
10
+ end
11
+
12
+ def build(input = nil)
13
+ if input && @block.arity >= 1
14
+ instance_exec(input, &@block)
15
+ else
16
+ instance_eval(&@block)
17
+ end
18
+ AST.new(@nodes)
19
+ end
20
+
21
+ def system(text)
22
+ @nodes << Nodes::SystemNode.new(text)
23
+ end
24
+
25
+ def rule(text)
26
+ @nodes << Nodes::RuleNode.new(text)
27
+ end
28
+
29
+ def example(input:, output:)
30
+ @nodes << Nodes::ExampleNode.new(input: input, output: output)
31
+ end
32
+
33
+ def user(text)
34
+ @nodes << Nodes::UserNode.new(text)
35
+ end
36
+
37
+ def section(name, text)
38
+ @nodes << Nodes::SectionNode.new(name, text)
39
+ end
40
+
41
+ def self.build(input: nil, &block)
42
+ new(block).build(input)
43
+ end
44
+ end
45
+ end
46
+ end
47
+ end
@@ -0,0 +1,25 @@
1
+ # frozen_string_literal: true
2
+
3
+ module RubyLLM
4
+ module Contract
5
+ module Prompt
6
+ class Node
7
+ attr_reader :type, :content
8
+
9
+ def initialize(type:, content:)
10
+ @type = type.freeze
11
+ @content = content.freeze
12
+ freeze
13
+ end
14
+
15
+ def ==(other)
16
+ other.is_a?(self.class) && type == other.type && content == other.content
17
+ end
18
+
19
+ def to_h
20
+ { type: @type, content: @content }
21
+ end
22
+ end
23
+ end
24
+ end
25
+ end
@@ -0,0 +1,27 @@
1
+ # frozen_string_literal: true
2
+
3
+ module RubyLLM
4
+ module Contract
5
+ module Prompt
6
+ module Nodes
7
+ class ExampleNode < Node
8
+ attr_reader :input, :output
9
+
10
+ def initialize(input:, output:)
11
+ @input = input.freeze
12
+ @output = output.freeze
13
+ super(type: :example, content: nil)
14
+ end
15
+
16
+ def ==(other)
17
+ other.is_a?(self.class) && type == other.type && input == other.input && output == other.output
18
+ end
19
+
20
+ def to_h
21
+ { type: :example, input: @input, output: @output }
22
+ end
23
+ end
24
+ end
25
+ end
26
+ end
27
+ end
@@ -0,0 +1,15 @@
1
+ # frozen_string_literal: true
2
+
3
+ module RubyLLM
4
+ module Contract
5
+ module Prompt
6
+ module Nodes
7
+ class RuleNode < Node
8
+ def initialize(content)
9
+ super(type: :rule, content: content)
10
+ end
11
+ end
12
+ end
13
+ end
14
+ end
15
+ end
@@ -0,0 +1,26 @@
1
+ # frozen_string_literal: true
2
+
3
+ module RubyLLM
4
+ module Contract
5
+ module Prompt
6
+ module Nodes
7
+ class SectionNode < Node
8
+ attr_reader :name
9
+
10
+ def initialize(name, content)
11
+ @name = name.freeze
12
+ super(type: :section, content: content)
13
+ end
14
+
15
+ def ==(other)
16
+ other.is_a?(self.class) && type == other.type && name == other.name && content == other.content
17
+ end
18
+
19
+ def to_h
20
+ { type: :section, name: @name, content: @content }
21
+ end
22
+ end
23
+ end
24
+ end
25
+ end
26
+ end
@@ -0,0 +1,15 @@
1
+ # frozen_string_literal: true
2
+
3
+ module RubyLLM
4
+ module Contract
5
+ module Prompt
6
+ module Nodes
7
+ class SystemNode < Node
8
+ def initialize(content)
9
+ super(type: :system, content: content)
10
+ end
11
+ end
12
+ end
13
+ end
14
+ end
15
+ end
@@ -0,0 +1,15 @@
1
+ # frozen_string_literal: true
2
+
3
+ module RubyLLM
4
+ module Contract
5
+ module Prompt
6
+ module Nodes
7
+ class UserNode < Node
8
+ def initialize(content)
9
+ super(type: :user, content: content)
10
+ end
11
+ end
12
+ end
13
+ end
14
+ end
15
+ end
@@ -0,0 +1,7 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative "nodes/system_node"
4
+ require_relative "nodes/rule_node"
5
+ require_relative "nodes/example_node"
6
+ require_relative "nodes/user_node"
7
+ require_relative "nodes/section_node"
@@ -0,0 +1,76 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "json"
4
+
5
+ module RubyLLM
6
+ module Contract
7
+ module Prompt
8
+ class Renderer
9
+ def render(ast, variables: {})
10
+ ast.each_with_object([]) do |node, messages|
11
+ render_node(node, variables, messages)
12
+ end
13
+ end
14
+
15
+ def self.render(ast, variables: {})
16
+ new.render(ast, variables: variables)
17
+ end
18
+
19
+ private
20
+
21
+ def render_node(node, variables, messages)
22
+ case node
23
+ when Nodes::SystemNode, Nodes::RuleNode
24
+ append_message(messages, :system, node.content, variables)
25
+ when Nodes::ExampleNode
26
+ append_message(messages, :user, node.input, variables)
27
+ append_message(messages, :assistant, node.output, variables)
28
+ when Nodes::UserNode
29
+ append_message(messages, :user, node.content, variables)
30
+ when Nodes::SectionNode
31
+ render_section_node(node, variables, messages)
32
+ end
33
+ end
34
+
35
+ def append_message(messages, role, raw_content, variables)
36
+ content = interpolate(raw_content, variables)
37
+ messages << { role: role, content: content } if content_present?(content)
38
+ end
39
+
40
+ def render_section_node(node, variables, messages)
41
+ section_content = node.content.is_a?(Hash) || node.content.is_a?(Array) ? node.content.to_json : node.content
42
+ return unless content_present?(section_content)
43
+
44
+ safe_name = sanitize_section_name(node.name)
45
+ body = interpolate(section_content, variables)
46
+ messages << { role: :system, content: "[#{safe_name}]\n#{body}" }
47
+ end
48
+
49
+ def content_present?(content)
50
+ content.to_s.strip != ""
51
+ end
52
+
53
+ def sanitize_section_name(name)
54
+ name.to_s.gsub(/[\[\]\n\r]/, " ").strip
55
+ end
56
+
57
+ def interpolate(text, variables)
58
+ return text if text.nil?
59
+ return text.to_json if text.is_a?(Hash) || text.is_a?(Array)
60
+
61
+ # Coerce non-String content (Integer, Symbol, etc.) to String before gsub
62
+ text = text.to_s unless text.is_a?(String)
63
+
64
+ text.gsub(/\{(\w+)\}/) do |match|
65
+ key = ::Regexp.last_match(1).to_sym
66
+ variables.key?(key) ? serialize_value(variables[key]) : match
67
+ end
68
+ end
69
+
70
+ def serialize_value(value)
71
+ value.is_a?(Hash) || value.is_a?(Array) ? value.to_json : value.to_s
72
+ end
73
+ end
74
+ end
75
+ end
76
+ end
@@ -0,0 +1,20 @@
1
+ # frozen_string_literal: true
2
+
3
+ module RubyLLM
4
+ module Contract
5
+ class Railtie < ::Rails::Railtie
6
+ # Eval files (e.g. classify_threads_eval.rb) don't define Zeitwerk-compatible
7
+ # constants — they call define_eval on an existing Step class. We use `load`
8
+ # after initialization, and hook into the reloader for development.
9
+
10
+ config.after_initialize do
11
+ RubyLLM::Contract.load_evals!
12
+ end
13
+
14
+ # Re-load eval files on code reload in development (Spring, zeitwerk:check, etc.)
15
+ config.to_prepare do
16
+ RubyLLM::Contract.load_evals!
17
+ end
18
+ end
19
+ end
20
+ end
@@ -0,0 +1,78 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "rake"
4
+ require "rake/tasklib"
5
+
6
+ module RubyLLM
7
+ module Contract
8
+ class RakeTask < ::Rake::TaskLib
9
+ attr_accessor :name, :context, :fail_on_empty, :minimum_score, :maximum_cost, :eval_dirs
10
+
11
+ def initialize(name = :"ruby_llm_contract:eval", &block)
12
+ super()
13
+ @name = name
14
+ @context = {}
15
+ @fail_on_empty = true
16
+ @minimum_score = nil # nil = require 100%; float = threshold
17
+ @maximum_cost = nil # nil = no cost limit; float = budget cap (suite-level)
18
+ @eval_dirs = [] # directories to load eval files from (non-Rails)
19
+ block&.call(self)
20
+ define_task
21
+ end
22
+
23
+ private
24
+
25
+ def define_task
26
+ desc "Run all ruby_llm-contract evals"
27
+ task(@name => task_prerequisites) do
28
+ require "ruby_llm/contract"
29
+ @eval_dirs.each { |dir| RubyLLM::Contract.load_evals!(dir) }
30
+ RubyLLM::Contract.load_evals!
31
+
32
+ results = RubyLLM::Contract.run_all_evals(context: @context)
33
+
34
+ if results.empty?
35
+ if @fail_on_empty
36
+ abort "No evals defined. Define evals with define_eval or set fail_on_empty = false."
37
+ else
38
+ puts "No evals defined."
39
+ next
40
+ end
41
+ end
42
+
43
+ gate_passed = true
44
+ suite_cost = 0.0
45
+
46
+ results.each do |host, reports|
47
+ puts "\n#{host.name || host.to_s}"
48
+ reports.each_value do |report|
49
+ report.print_summary
50
+ suite_cost += report.total_cost
51
+ gate_passed = false unless report_meets_score?(report)
52
+ end
53
+ end
54
+
55
+ if @maximum_cost && suite_cost > @maximum_cost
56
+ abort "\nEval suite FAILED: total cost $#{format("%.4f", suite_cost)} " \
57
+ "exceeds budget $#{format("%.4f", @maximum_cost)}"
58
+ end
59
+
60
+ abort "\nEval suite FAILED" unless gate_passed
61
+ puts "\nAll evals passed."
62
+ end
63
+ end
64
+
65
+ def report_meets_score?(report)
66
+ if @minimum_score
67
+ report.score >= @minimum_score
68
+ else
69
+ report.passed?
70
+ end
71
+ end
72
+
73
+ def task_prerequisites
74
+ Rake::Task.task_defined?(:environment) ? [:environment] : []
75
+ end
76
+ end
77
+ end
78
+ end
@@ -0,0 +1,96 @@
1
+ # frozen_string_literal: true
2
+
3
+ module RubyLLM
4
+ module Contract
5
+ module RSpec
6
+ # Helper methods for the pass_eval matcher to keep the block short.
7
+ module PassEvalHelpers
8
+ def format_failure_message(eval_name, error, report, minimum_score, maximum_cost)
9
+ return format_error_message(eval_name, error) if error
10
+
11
+ format_report_message(eval_name, report, minimum_score, maximum_cost)
12
+ end
13
+
14
+ def format_error_message(eval_name, error)
15
+ "expected #{eval_name} eval to pass, but it raised an error:\n #{error.class}: #{error.message}"
16
+ end
17
+
18
+ def format_report_message(eval_name, report, minimum_score, maximum_cost)
19
+ lines = build_header(eval_name, report, minimum_score, maximum_cost)
20
+ lines << ""
21
+
22
+ report.results.each do |result|
23
+ cost_str = result.cost ? " $#{format("%.4f", result.cost)}" : ""
24
+ lines << " #{result.label} #{result.name} (score: #{result.score})#{cost_str}"
25
+ lines << " #{result.details}" if result.details && result.failed?
26
+ end
27
+
28
+ lines.join("\n")
29
+ end
30
+
31
+ private
32
+
33
+ def build_header(eval_name, report, minimum_score, maximum_cost)
34
+ cost_str = report.total_cost.positive? ? ", cost: $#{format("%.4f", report.total_cost)}" : ""
35
+
36
+ if maximum_cost && report.total_cost > maximum_cost
37
+ ["expected #{eval_name} eval cost <= $#{format("%.4f", maximum_cost)}, " \
38
+ "but got: $#{format("%.4f", report.total_cost)} (#{report.pass_rate})"]
39
+ elsif minimum_score
40
+ ["expected #{eval_name} eval score >= #{minimum_score}, " \
41
+ "but got: #{report.score.round(2)} (#{report.pass_rate}#{cost_str})"]
42
+ else
43
+ ["expected #{eval_name} eval to pass, " \
44
+ "but got score: #{report.score.round(2)} (#{report.pass_rate}#{cost_str})"]
45
+ end
46
+ end
47
+ end
48
+ end
49
+ end
50
+ end
51
+
52
+ RSpec::Matchers.define :pass_eval do |eval_name|
53
+ include RubyLLM::Contract::RSpec::PassEvalHelpers
54
+
55
+ chain :with_context do |ctx|
56
+ @context = ctx
57
+ end
58
+
59
+ chain :with_minimum_score do |score|
60
+ @minimum_score = score
61
+ end
62
+
63
+ chain :with_maximum_cost do |cost|
64
+ @maximum_cost = cost
65
+ end
66
+
67
+ match do |step_or_pipeline|
68
+ @eval_name = eval_name
69
+ @context ||= {}
70
+ @minimum_score ||= nil
71
+ @maximum_cost ||= nil
72
+ @error = nil
73
+ @report = step_or_pipeline.run_eval(eval_name, context: @context)
74
+
75
+ score_ok = if @minimum_score
76
+ @report.score >= @minimum_score
77
+ else
78
+ @report.passed?
79
+ end
80
+
81
+ cost_ok = @maximum_cost ? @report.total_cost <= @maximum_cost : true
82
+
83
+ score_ok && cost_ok
84
+ rescue StandardError => e
85
+ @error = e
86
+ false
87
+ end
88
+
89
+ failure_message do
90
+ format_failure_message(@eval_name, @error, @report, @minimum_score, @maximum_cost)
91
+ end
92
+
93
+ failure_message_when_negated do
94
+ "expected #{@eval_name} eval NOT to pass, but it passed with score: #{@report.score.round(2)}"
95
+ end
96
+ end
@@ -0,0 +1,31 @@
1
+ # frozen_string_literal: true
2
+
3
+ RSpec::Matchers.define :satisfy_contract do
4
+ match do |result|
5
+ @result = result
6
+ result.respond_to?(:ok?) && result.ok?
7
+ end
8
+
9
+ failure_message do
10
+ lines = ["expected step result to satisfy contract, but got status: #{@result.status}"]
11
+
12
+ if @result.respond_to?(:validation_errors) && @result.validation_errors.any?
13
+ lines << ""
14
+ lines << "Validation errors:"
15
+ @result.validation_errors.each { |e| lines << " - #{e}" }
16
+ end
17
+
18
+ if @result.respond_to?(:raw_output) && @result.raw_output
19
+ output = @result.raw_output.to_s
20
+ output = "#{output[0, 200]}..." if output.size > 200
21
+ lines << ""
22
+ lines << "Raw output: #{output}"
23
+ end
24
+
25
+ lines.join("\n")
26
+ end
27
+
28
+ failure_message_when_negated do
29
+ "expected step result NOT to satisfy contract, but it passed with status: :ok"
30
+ end
31
+ end
@@ -0,0 +1,6 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "ruby_llm/contract"
4
+
5
+ require_relative "rspec/satisfy_contract"
6
+ require_relative "rspec/pass_eval"
@@ -0,0 +1,138 @@
1
+ # frozen_string_literal: true
2
+
3
+ module RubyLLM
4
+ module Contract
5
+ module Step
6
+ class Base
7
+ def self.inherited(subclass)
8
+ super
9
+ Contract.register_eval_host(subclass) if respond_to?(:eval_defined?) && eval_defined?
10
+ end
11
+
12
+ class << self
13
+ include Concerns::EvalHost
14
+ include RetryExecutor
15
+ include Dsl
16
+
17
+ def eval_case(input:, expected: nil, expected_traits: nil, evaluator: nil, context: {})
18
+ dataset = Eval::Dataset.define("single_case") do
19
+ add_case("inline", input: input, expected: expected,
20
+ expected_traits: expected_traits, evaluator: evaluator)
21
+ end
22
+ report = Eval::Runner.run(step: self, dataset: dataset, context: context)
23
+ report.results.first
24
+ end
25
+
26
+ def estimate_cost(input:, model: nil)
27
+ model_name = model || RubyLLM::Contract.configuration.default_model
28
+ messages = build_messages(input)
29
+ input_tokens = TokenEstimator.estimate(messages)
30
+ output_tokens = max_output || 256 # conservative default
31
+
32
+ model_info = CostCalculator.send(:find_model, model_name)
33
+ return nil unless model_info
34
+
35
+ estimated = CostCalculator.send(:compute_cost, model_info,
36
+ { input_tokens: input_tokens, output_tokens: output_tokens })
37
+ {
38
+ model: model_name,
39
+ input_tokens: input_tokens,
40
+ output_tokens_estimate: output_tokens,
41
+ estimated_cost: estimated
42
+ }
43
+ end
44
+
45
+ def estimate_eval_cost(eval_name, models: nil)
46
+ defn = send(:all_eval_definitions)[eval_name.to_s]
47
+ raise ArgumentError, "No eval '#{eval_name}' defined" unless defn
48
+
49
+ model_list = models || [RubyLLM::Contract.configuration.default_model].compact
50
+ cases = defn.build_dataset.cases
51
+
52
+ model_list.each_with_object({}) do |model_name, result|
53
+ per_case = cases.sum do |c|
54
+ est = estimate_cost(input: c.input, model: model_name)
55
+ est ? est[:estimated_cost] : 0.0
56
+ end
57
+ result[model_name] = per_case.round(6)
58
+ end
59
+ end
60
+
61
+ KNOWN_CONTEXT_KEYS = %i[adapter model temperature max_tokens schema provider assume_model_exists].freeze
62
+
63
+ def run(input, context: {})
64
+ warn_unknown_context_keys(context)
65
+ adapter = resolve_adapter(context)
66
+ default_model = context[:model] || RubyLLM::Contract.configuration.default_model
67
+ policy = retry_policy
68
+
69
+ if policy
70
+ run_with_retry(input, adapter: adapter, default_model: default_model, policy: policy)
71
+ else
72
+ run_once(input, adapter: adapter, model: default_model)
73
+ end
74
+ end
75
+
76
+ private
77
+
78
+ def warn_unknown_context_keys(context)
79
+ unknown = context.keys - KNOWN_CONTEXT_KEYS
80
+ return if unknown.empty?
81
+
82
+ warn "[ruby_llm-contract] Unknown context keys: #{unknown.inspect}. " \
83
+ "Known keys: #{KNOWN_CONTEXT_KEYS.inspect}"
84
+ end
85
+
86
+ def resolve_adapter(context)
87
+ adapter = context[:adapter] || RubyLLM::Contract.configuration.default_adapter
88
+ return adapter if adapter
89
+
90
+ raise RubyLLM::Contract::Error, "No adapter configured. Set one with RubyLLM::Contract.configure " \
91
+ "{ |c| c.default_adapter = ... } or pass context: { adapter: ... }"
92
+ end
93
+
94
+ def run_once(input, adapter:, model:)
95
+ Runner.new(
96
+ input_type: input_type, output_type: output_type,
97
+ prompt_block: prompt, contract_definition: effective_contract,
98
+ adapter: adapter, model: model, output_schema: output_schema,
99
+ max_output: max_output, max_input: max_input, max_cost: max_cost
100
+ ).call(input)
101
+ rescue ArgumentError => e
102
+ Result.new(status: :input_error, raw_output: nil, parsed_output: nil,
103
+ validation_errors: [e.message])
104
+ end
105
+
106
+ def effective_contract
107
+ base = contract
108
+ extra = class_validates
109
+ inferred_parse = json_compatible_type?(output_type) ? :json : nil
110
+
111
+ return base if extra.empty? && inferred_parse.nil?
112
+
113
+ has_own_contract = defined?(@contract_definition) && @contract_definition
114
+ Definition.merge(
115
+ base,
116
+ extra_invariants: extra,
117
+ parse_override: inferred_parse && !has_own_contract ? inferred_parse : nil
118
+ )
119
+ end
120
+
121
+ def build_messages(input)
122
+ dynamic = prompt.arity >= 1
123
+ ast = Prompt::Builder.build(input: dynamic ? input : nil, &prompt)
124
+ variables = dynamic ? {} : { input: input }
125
+ variables.merge!(input.transform_keys(&:to_sym)) if !dynamic && input.is_a?(Hash)
126
+ Prompt::Renderer.render(ast, variables: variables)
127
+ end
128
+
129
+ def json_compatible_type?(type)
130
+ type == RubyLLM::Contract::Types::Hash || type == Hash ||
131
+ type == RubyLLM::Contract::Types::Array || type == Array ||
132
+ (type.respond_to?(:name) && type.name&.match?(/Hash|Array/))
133
+ end
134
+ end
135
+ end
136
+ end
137
+ end
138
+ end