ruby_llm-contract 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (89) hide show
  1. checksums.yaml +7 -0
  2. data/.rspec +3 -0
  3. data/.rubocop.yml +55 -0
  4. data/CHANGELOG.md +76 -0
  5. data/Gemfile +11 -0
  6. data/Gemfile.lock +176 -0
  7. data/LICENSE +21 -0
  8. data/README.md +154 -0
  9. data/Rakefile +8 -0
  10. data/examples/00_basics.rb +500 -0
  11. data/examples/01_classify_threads.rb +220 -0
  12. data/examples/02_generate_comment.rb +203 -0
  13. data/examples/03_target_audience.rb +201 -0
  14. data/examples/04_real_llm.rb +410 -0
  15. data/examples/05_output_schema.rb +258 -0
  16. data/examples/07_keyword_extraction.rb +239 -0
  17. data/examples/08_translation.rb +353 -0
  18. data/examples/09_eval_dataset.rb +287 -0
  19. data/examples/10_reddit_full_showcase.rb +363 -0
  20. data/examples/README.md +140 -0
  21. data/lib/ruby_llm/contract/adapters/base.rb +13 -0
  22. data/lib/ruby_llm/contract/adapters/response.rb +17 -0
  23. data/lib/ruby_llm/contract/adapters/ruby_llm.rb +94 -0
  24. data/lib/ruby_llm/contract/adapters/test.rb +44 -0
  25. data/lib/ruby_llm/contract/adapters.rb +6 -0
  26. data/lib/ruby_llm/contract/concerns/deep_symbolize.rb +17 -0
  27. data/lib/ruby_llm/contract/concerns/eval_host.rb +109 -0
  28. data/lib/ruby_llm/contract/concerns/trace_equality.rb +15 -0
  29. data/lib/ruby_llm/contract/concerns/usage_aggregator.rb +43 -0
  30. data/lib/ruby_llm/contract/configuration.rb +21 -0
  31. data/lib/ruby_llm/contract/contract/definition.rb +39 -0
  32. data/lib/ruby_llm/contract/contract/invariant.rb +23 -0
  33. data/lib/ruby_llm/contract/contract/parser.rb +143 -0
  34. data/lib/ruby_llm/contract/contract/schema_validator.rb +239 -0
  35. data/lib/ruby_llm/contract/contract/validator.rb +104 -0
  36. data/lib/ruby_llm/contract/contract.rb +7 -0
  37. data/lib/ruby_llm/contract/cost_calculator.rb +38 -0
  38. data/lib/ruby_llm/contract/dsl.rb +13 -0
  39. data/lib/ruby_llm/contract/errors.rb +19 -0
  40. data/lib/ruby_llm/contract/eval/case_result.rb +76 -0
  41. data/lib/ruby_llm/contract/eval/contract_detail_builder.rb +47 -0
  42. data/lib/ruby_llm/contract/eval/dataset.rb +53 -0
  43. data/lib/ruby_llm/contract/eval/eval_definition.rb +112 -0
  44. data/lib/ruby_llm/contract/eval/evaluation_result.rb +27 -0
  45. data/lib/ruby_llm/contract/eval/evaluator/exact.rb +20 -0
  46. data/lib/ruby_llm/contract/eval/evaluator/json_includes.rb +58 -0
  47. data/lib/ruby_llm/contract/eval/evaluator/proc_evaluator.rb +40 -0
  48. data/lib/ruby_llm/contract/eval/evaluator/regex.rb +27 -0
  49. data/lib/ruby_llm/contract/eval/model_comparison.rb +80 -0
  50. data/lib/ruby_llm/contract/eval/pipeline_result_adapter.rb +15 -0
  51. data/lib/ruby_llm/contract/eval/report.rb +115 -0
  52. data/lib/ruby_llm/contract/eval/runner.rb +162 -0
  53. data/lib/ruby_llm/contract/eval/trait_evaluator.rb +75 -0
  54. data/lib/ruby_llm/contract/eval.rb +16 -0
  55. data/lib/ruby_llm/contract/pipeline/base.rb +62 -0
  56. data/lib/ruby_llm/contract/pipeline/result.rb +131 -0
  57. data/lib/ruby_llm/contract/pipeline/runner.rb +139 -0
  58. data/lib/ruby_llm/contract/pipeline/trace.rb +72 -0
  59. data/lib/ruby_llm/contract/pipeline.rb +6 -0
  60. data/lib/ruby_llm/contract/prompt/ast.rb +38 -0
  61. data/lib/ruby_llm/contract/prompt/builder.rb +47 -0
  62. data/lib/ruby_llm/contract/prompt/node.rb +25 -0
  63. data/lib/ruby_llm/contract/prompt/nodes/example_node.rb +27 -0
  64. data/lib/ruby_llm/contract/prompt/nodes/rule_node.rb +15 -0
  65. data/lib/ruby_llm/contract/prompt/nodes/section_node.rb +26 -0
  66. data/lib/ruby_llm/contract/prompt/nodes/system_node.rb +15 -0
  67. data/lib/ruby_llm/contract/prompt/nodes/user_node.rb +15 -0
  68. data/lib/ruby_llm/contract/prompt/nodes.rb +7 -0
  69. data/lib/ruby_llm/contract/prompt/renderer.rb +76 -0
  70. data/lib/ruby_llm/contract/railtie.rb +20 -0
  71. data/lib/ruby_llm/contract/rake_task.rb +78 -0
  72. data/lib/ruby_llm/contract/rspec/pass_eval.rb +96 -0
  73. data/lib/ruby_llm/contract/rspec/satisfy_contract.rb +31 -0
  74. data/lib/ruby_llm/contract/rspec.rb +6 -0
  75. data/lib/ruby_llm/contract/step/base.rb +138 -0
  76. data/lib/ruby_llm/contract/step/dsl.rb +144 -0
  77. data/lib/ruby_llm/contract/step/limit_checker.rb +64 -0
  78. data/lib/ruby_llm/contract/step/result.rb +38 -0
  79. data/lib/ruby_llm/contract/step/retry_executor.rb +90 -0
  80. data/lib/ruby_llm/contract/step/retry_policy.rb +76 -0
  81. data/lib/ruby_llm/contract/step/runner.rb +126 -0
  82. data/lib/ruby_llm/contract/step/trace.rb +70 -0
  83. data/lib/ruby_llm/contract/step.rb +10 -0
  84. data/lib/ruby_llm/contract/token_estimator.rb +19 -0
  85. data/lib/ruby_llm/contract/types.rb +11 -0
  86. data/lib/ruby_llm/contract/version.rb +7 -0
  87. data/lib/ruby_llm/contract.rb +108 -0
  88. data/ruby_llm-contract.gemspec +33 -0
  89. metadata +172 -0
@@ -0,0 +1,162 @@
1
+ # frozen_string_literal: true
2
+
3
+ module RubyLLM
4
+ module Contract
5
+ module Eval
6
+ class Runner
7
+ include TraitEvaluator
8
+ include ContractDetailBuilder
9
+
10
+ def self.run(step:, dataset:, context: {})
11
+ new(step: step, dataset: dataset, context: context).run
12
+ end
13
+
14
+ def initialize(step:, dataset:, context: {})
15
+ @step = step
16
+ @dataset = dataset
17
+ @context = context
18
+ end
19
+
20
+ def run
21
+ results = @dataset.cases.map { |test_case| evaluate_case(test_case) }
22
+ Report.new(dataset_name: @dataset.name, results: results)
23
+ end
24
+
25
+ private
26
+
27
+ def evaluate_case(test_case)
28
+ run_result = @step.run(test_case.input, context: @context)
29
+ step_result = normalize_result(run_result)
30
+ eval_result = dispatch_evaluation(step_result, test_case)
31
+
32
+ build_case_result(test_case, step_result, eval_result)
33
+ rescue RubyLLM::Contract::Error => e
34
+ # No adapter configured — skip this case (offline mode without sample_response)
35
+ skipped_result(test_case, e.message)
36
+ end
37
+
38
+ def build_case_result(test_case, step_result, eval_result)
39
+ trace = step_result.respond_to?(:trace) ? step_result.trace : nil
40
+ CaseResult.new(
41
+ name: test_case.name,
42
+ input: test_case.input,
43
+ output: step_result.parsed_output,
44
+ expected: test_case.expected,
45
+ step_status: step_result.status,
46
+ score: eval_result.score,
47
+ passed: eval_result.passed,
48
+ label: eval_result.label,
49
+ details: eval_result.details,
50
+ duration_ms: extract_latency(trace),
51
+ cost: extract_cost(trace)
52
+ )
53
+ end
54
+
55
+ def extract_latency(trace)
56
+ return nil unless trace
57
+
58
+ # Pipeline::Trace uses total_latency_ms, Step::Trace uses latency_ms
59
+ if trace.respond_to?(:total_latency_ms)
60
+ trace.total_latency_ms
61
+ else
62
+ trace[:latency_ms]
63
+ end
64
+ end
65
+
66
+ def extract_cost(trace)
67
+ return nil unless trace
68
+
69
+ # Pipeline::Trace uses total_cost, Step::Trace uses cost
70
+ if trace.respond_to?(:total_cost)
71
+ trace.total_cost
72
+ else
73
+ trace[:cost]
74
+ end
75
+ end
76
+
77
+ def dispatch_evaluation(step_result, test_case)
78
+ return contract_failure(step_result) unless step_result.ok?
79
+
80
+ if test_case.evaluator
81
+ evaluate_with_custom(step_result, test_case)
82
+ elsif test_case.expected_traits
83
+ evaluate_traits(step_result, test_case)
84
+ elsif test_case.expected
85
+ evaluate_expected(step_result, test_case)
86
+ else
87
+ evaluate_contract_only
88
+ end
89
+ end
90
+
91
+ def normalize_result(result)
92
+ return result if result.respond_to?(:parsed_output)
93
+
94
+ normalize_pipeline_result(result)
95
+ end
96
+
97
+ def normalize_pipeline_result(result)
98
+ last_result = result.step_results&.last&.dig(:result)
99
+ is_ok = result.ok?
100
+ pipeline_trace = result.respond_to?(:trace) ? result.trace : nil
101
+
102
+ PipelineResultAdapter.new(
103
+ status: result.status,
104
+ ok_flag: is_ok,
105
+ parsed_output: is_ok ? result.outputs_by_step.values.last : nil,
106
+ validation_errors: last_result.respond_to?(:validation_errors) ? last_result.validation_errors : [],
107
+ trace: pipeline_trace || (last_result.respond_to?(:trace) ? last_result.trace : {})
108
+ )
109
+ end
110
+
111
+ def evaluate_expected(step_result, test_case)
112
+ dispatch_expected_evaluator(
113
+ output: step_result.parsed_output,
114
+ expected: test_case.expected,
115
+ input: test_case.input
116
+ )
117
+ end
118
+
119
+ def dispatch_expected_evaluator(output:, expected:, input:)
120
+ if expected.is_a?(Hash)
121
+ Evaluator::JsonIncludes.new.call(output: output, expected: expected, input: input)
122
+ elsif expected.is_a?(::Regexp)
123
+ Evaluator::Regex.new(expected).call(output: output, input: input)
124
+ else
125
+ Evaluator::Exact.new.call(output: output, expected: expected, input: input)
126
+ end
127
+ end
128
+
129
+ def evaluate_with_custom(step_result, test_case)
130
+ evaluator = test_case.evaluator
131
+ evaluator = Evaluator::ProcEvaluator.new(evaluator) if evaluator.is_a?(::Proc)
132
+ evaluator.call(output: step_result.parsed_output, expected: test_case.expected, input: test_case.input)
133
+ end
134
+
135
+ def evaluate_contract_only
136
+ EvaluationResult.new(score: 1.0, passed: true, details: build_contract_details)
137
+ end
138
+
139
+ def contract_failure(step_result)
140
+ EvaluationResult.new(
141
+ score: 0.0, passed: false,
142
+ details: "step failed: #{step_result.status} — #{step_result.validation_errors.join(", ")}"
143
+ )
144
+ end
145
+
146
+ def skipped_result(test_case, reason)
147
+ CaseResult.new(
148
+ name: test_case.name,
149
+ input: test_case.input,
150
+ output: nil,
151
+ expected: test_case.expected,
152
+ step_status: :skipped,
153
+ score: 0.0,
154
+ passed: false,
155
+ label: "SKIP",
156
+ details: "skipped: #{reason}"
157
+ )
158
+ end
159
+ end
160
+ end
161
+ end
162
+ end
@@ -0,0 +1,75 @@
1
+ # frozen_string_literal: true
2
+
3
+ module RubyLLM
4
+ module Contract
5
+ module Eval
6
+ # Extracted from Runner to reduce class length.
7
+ # Evaluates expected_traits against parsed output.
8
+ module TraitEvaluator
9
+ private
10
+
11
+ def evaluate_traits(step_result, test_case)
12
+ output = step_result.parsed_output
13
+ traits = test_case.expected_traits
14
+ errors = traits.each_with_object([]) do |(key, expectation), errs|
15
+ check_trait(output, key, expectation, errs)
16
+ end
17
+
18
+ build_trait_result(errors, traits.length)
19
+ end
20
+
21
+ def check_trait(output, key, expectation, errors)
22
+ value = output.is_a?(Hash) ? output[key] : nil
23
+ error_msg = trait_error(key, value, expectation)
24
+ errors << error_msg if error_msg
25
+ end
26
+
27
+ def trait_error(key, value, expectation)
28
+ case expectation
29
+ when ::Regexp
30
+ trait_regexp_error(key, value, expectation)
31
+ when Range
32
+ trait_range_error(key, value, expectation)
33
+ when true
34
+ trait_truthy_error(key, value)
35
+ when false
36
+ trait_falsy_error(key, value)
37
+ else
38
+ trait_equality_error(key, value, expectation)
39
+ end
40
+ end
41
+
42
+ def trait_regexp_error(key, value, expectation)
43
+ "#{key}: does not match #{expectation.inspect}" unless value.to_s.match?(expectation)
44
+ end
45
+
46
+ def trait_range_error(key, value, expectation)
47
+ comparable = value.is_a?(Numeric) ? value : value.to_s.length
48
+ "#{key}: #{value.inspect} not in #{expectation}" unless expectation.include?(comparable)
49
+ end
50
+
51
+ def trait_truthy_error(key, value)
52
+ "#{key}: expected truthy, got #{value.inspect}" unless value
53
+ end
54
+
55
+ def trait_falsy_error(key, value)
56
+ "#{key}: expected falsy, got #{value.inspect}" if value
57
+ end
58
+
59
+ def trait_equality_error(key, value, expectation)
60
+ "#{key}: expected #{expectation.inspect}, got #{value.inspect}" unless value == expectation
61
+ end
62
+
63
+ def build_trait_result(errors, trait_count)
64
+ if errors.empty?
65
+ EvaluationResult.new(score: 1.0, passed: true, details: "all traits match")
66
+ else
67
+ matched = trait_count - errors.length
68
+ score = trait_count.zero? ? 0.0 : matched.to_f / trait_count
69
+ EvaluationResult.new(score: score, passed: false, details: errors.join("; "))
70
+ end
71
+ end
72
+ end
73
+ end
74
+ end
75
+ end
@@ -0,0 +1,16 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative "eval/evaluation_result"
4
+ require_relative "eval/case_result"
5
+ require_relative "eval/evaluator/exact"
6
+ require_relative "eval/evaluator/regex"
7
+ require_relative "eval/evaluator/json_includes"
8
+ require_relative "eval/evaluator/proc_evaluator"
9
+ require_relative "eval/dataset"
10
+ require_relative "eval/pipeline_result_adapter"
11
+ require_relative "eval/trait_evaluator"
12
+ require_relative "eval/contract_detail_builder"
13
+ require_relative "eval/runner"
14
+ require_relative "eval/report"
15
+ require_relative "eval/eval_definition"
16
+ require_relative "eval/model_comparison"
@@ -0,0 +1,62 @@
1
+ # frozen_string_literal: true
2
+
3
+ module RubyLLM
4
+ module Contract
5
+ module Pipeline
6
+ class Base
7
+ def self.inherited(subclass)
8
+ super
9
+ Contract.register_eval_host(subclass) if respond_to?(:eval_defined?) && eval_defined?
10
+ end
11
+
12
+ class << self
13
+ include Concerns::EvalHost
14
+
15
+ # depends_on is accepted for forward compatibility with DAG pipelines (v0.3).
16
+ # Currently, execution is always linear in declaration order.
17
+ def step(step_class, as:, depends_on: nil, model: nil)
18
+ validate_dependency!(depends_on) if depends_on
19
+ steps_registry << { step_class: step_class, alias: as, depends_on: depends_on, model: model }
20
+ end
21
+
22
+ def steps
23
+ steps_registry.dup.freeze
24
+ end
25
+
26
+ # Internal mutable steps list for registration
27
+ def steps_registry
28
+ @steps_registry ||= []
29
+ end
30
+
31
+ def token_budget(limit = nil)
32
+ return @token_budget = limit if limit
33
+
34
+ @token_budget
35
+ end
36
+
37
+ def run(input, context: {}, timeout_ms: nil)
38
+ Runner.new(steps: steps, context: context, timeout_ms: timeout_ms, token_budget: token_budget).call(input)
39
+ end
40
+
41
+ def test(input, responses: {}, timeout_ms: nil)
42
+ ordered_responses = steps.map { |step_entry| responses.fetch(step_entry[:alias], "") }
43
+ adapter = Adapters::Test.new(responses: ordered_responses)
44
+ run(input, context: { adapter: adapter }, timeout_ms: timeout_ms)
45
+ end
46
+
47
+ private
48
+
49
+ def known_step_aliases
50
+ steps_registry.map { |step_entry| step_entry[:alias] }
51
+ end
52
+
53
+ def validate_dependency!(dep)
54
+ return if known_step_aliases.include?(dep)
55
+
56
+ raise ArgumentError, "Unknown dependency: #{dep.inspect}. Known steps: #{known_step_aliases.inspect}"
57
+ end
58
+ end
59
+ end
60
+ end
61
+ end
62
+ end
@@ -0,0 +1,131 @@
1
+ # frozen_string_literal: true
2
+
3
+ module RubyLLM
4
+ module Contract
5
+ module Pipeline
6
+ class Result
7
+ attr_reader :status, :step_results, :outputs_by_step, :failed_step, :trace
8
+
9
+ # Column widths for pretty_print table
10
+ COL1 = 14 # step name
11
+ COL2 = 10 # status
12
+ COL3 = 50 # output
13
+
14
+ TOP_BORDER = "+#{"-" * (COL1 + COL2 + COL3 + 8)}+".freeze
15
+ MID_BORDER = "+-#{"-" * COL1}-+-#{"-" * COL2}-+-#{"-" * COL3}-+".freeze
16
+
17
+ def initialize(status:, step_results:, outputs_by_step:, failed_step: nil, trace: Trace.new)
18
+ @status = status
19
+ @step_results = step_results.each(&:freeze).freeze
20
+ @outputs_by_step = outputs_by_step.freeze
21
+ @failed_step = failed_step
22
+ @trace = trace
23
+ freeze
24
+ end
25
+
26
+ def ok?
27
+ @status == :ok
28
+ end
29
+
30
+ def failed?
31
+ @status != :ok
32
+ end
33
+
34
+ def to_s
35
+ lines = [header_line]
36
+ @step_results.each { |sr| lines << step_line(sr) }
37
+ lines.join("\n")
38
+ end
39
+
40
+ def pretty_print(io = $stdout)
41
+ build_table.each { |line| io.puts line }
42
+ end
43
+
44
+ private
45
+
46
+ def build_table
47
+ header_width = COL1 + COL2 + COL3 + 2
48
+ [TOP_BORDER,
49
+ "| #{header_line.ljust(header_width)} |",
50
+ MID_BORDER,
51
+ "| #{"Step".ljust(COL1)} | #{"Status".ljust(COL2)} | #{"Output".ljust(COL3)} |",
52
+ MID_BORDER,
53
+ *build_step_rows,
54
+ TOP_BORDER]
55
+ end
56
+
57
+ def build_step_rows
58
+ rows = []
59
+ @step_results.each_with_index do |sr, idx|
60
+ rows.concat(build_single_step_rows(sr))
61
+ rows << MID_BORDER if idx < @step_results.size - 1
62
+ end
63
+ rows
64
+ end
65
+
66
+ def build_single_step_rows(step_record)
67
+ step_alias = step_record[:alias].to_s
68
+ status_str = step_status(step_record[:result])
69
+ output_lines = format_output(@outputs_by_step[step_record[:alias]])
70
+ first_row = build_first_step_row(step_alias, status_str, output_lines.first || "")
71
+ continuation_rows = build_continuation_rows(output_lines.drop(1))
72
+
73
+ [first_row, *continuation_rows]
74
+ end
75
+
76
+ def build_first_step_row(step_alias, status_str, first_line)
77
+ "| #{step_alias.ljust(COL1)} | #{status_str.ljust(COL2)} | #{first_line.ljust(COL3)} |"
78
+ end
79
+
80
+ def build_continuation_rows(lines)
81
+ blank_prefix = "| #{" " * COL1} | #{" " * COL2} | "
82
+ lines.map { |line| "#{blank_prefix}#{line.ljust(COL3)} |" }
83
+ end
84
+
85
+ def header_line
86
+ parts = ["Pipeline: #{@status}"]
87
+ append_trace_details(parts) if @trace
88
+ parts.join(" ")
89
+ end
90
+
91
+ def append_trace_details(parts)
92
+ parts << "#{@step_results.size} steps"
93
+ parts << "#{@trace.total_latency_ms}ms" if @trace.total_latency_ms
94
+ append_usage_details(parts)
95
+ parts << "$#{format("%.6f", @trace.total_cost)}" if @trace.total_cost
96
+ parts << "trace=#{@trace.trace_id&.slice(0, 8)}" if @trace.trace_id
97
+ end
98
+
99
+ def append_usage_details(parts)
100
+ usage = @trace.total_usage
101
+ return unless usage.is_a?(Hash)
102
+
103
+ parts << "#{usage[:input_tokens]}+#{usage[:output_tokens]} tokens"
104
+ end
105
+
106
+ def step_line(step_record)
107
+ step_result = step_record[:result]
108
+ trace = step_result.trace
109
+ status = step_status(step_result)
110
+ trace_str = trace.respond_to?(:to_s) ? trace.to_s : ""
111
+ " #{step_record[:alias].to_s.ljust(14)} #{status.ljust(10)} #{trace_str}"
112
+ end
113
+
114
+ def step_status(step_result)
115
+ step_result.ok? ? "ok" : step_result.status.to_s
116
+ end
117
+
118
+ def format_output(output)
119
+ return ["(no output)"] unless output
120
+
121
+ pairs = output.is_a?(Hash) ? output : { value: output }
122
+ pairs.map do |key, val|
123
+ str = val.is_a?(String) ? val : val.inspect
124
+ line = "#{key}: #{str}"
125
+ line.size > COL3 ? "#{line[0, COL3 - 3]}..." : line
126
+ end
127
+ end
128
+ end
129
+ end
130
+ end
131
+ end
@@ -0,0 +1,139 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "securerandom"
4
+
5
+ module RubyLLM
6
+ module Contract
7
+ module Pipeline
8
+ class Runner
9
+ include Concerns::UsageAggregator
10
+
11
+ def initialize(steps:, context:, timeout_ms: nil, token_budget: nil)
12
+ raise ArgumentError, "timeout_ms must be positive (got #{timeout_ms})" if timeout_ms && timeout_ms <= 0
13
+ raise ArgumentError, "Pipeline has no steps defined" if steps.empty?
14
+
15
+ @steps = steps
16
+ @context = context
17
+ @timeout_ms = timeout_ms
18
+ @token_budget = token_budget
19
+ end
20
+
21
+ def call(input)
22
+ execution = ExecutionState.new(input)
23
+ start_time = Process.clock_gettime(Process::CLOCK_MONOTONIC)
24
+ run_steps(execution, start_time)
25
+ finalize_result(execution, start_time)
26
+ end
27
+
28
+ def run_steps(execution, start_time)
29
+ @steps.each_with_index do |step_def, index|
30
+ execute_step(step_def, execution)
31
+ break if execution.failed?
32
+ break if check_limits(index, step_def, execution, start_time)
33
+ end
34
+ end
35
+
36
+ private
37
+
38
+ def execute_step(step_def, execution)
39
+ step_context = build_step_context(step_def)
40
+ result = step_def[:step_class].run(execution.current_input, context: step_context)
41
+
42
+ execution.record_step(step_def[:alias], result)
43
+ end
44
+
45
+ def build_step_context(step_def)
46
+ model = step_def[:model]
47
+ model ? @context.merge(model: model) : @context
48
+ end
49
+
50
+ def check_limits(index, step_def, execution, start_time)
51
+ limit_status = detect_limit_violation(execution, start_time)
52
+ return unless limit_status
53
+
54
+ failing_alias = next_step_alias(index, step_def)
55
+ execution.mark_limit_failure(limit_status, failing_alias)
56
+ true
57
+ end
58
+
59
+ # NOTE: This is a cooperative timeout, not a hard deadline. The timeout is
60
+ # checked between steps, after each step completes. A slow step (e.g. long
61
+ # LLM call or multi-attempt retry) can exceed the deadline before the check
62
+ # runs. This is a known architectural limitation -- safely interrupting a
63
+ # running HTTP call in Ruby requires threads/fibers, which adds significant
64
+ # complexity. For most pipelines this cooperative approach is sufficient;
65
+ # set timeout_ms with enough headroom for your slowest expected step.
66
+ def detect_limit_violation(execution, start_time)
67
+ if @timeout_ms && elapsed_ms(start_time) >= @timeout_ms
68
+ :timeout
69
+ elsif @token_budget && sum_tokens(execution.step_traces) > @token_budget
70
+ :budget_exceeded
71
+ end
72
+ end
73
+
74
+ def next_step_alias(index, step_def)
75
+ @steps[index + 1]&.dig(:alias) || step_def[:alias]
76
+ end
77
+
78
+ def finalize_result(execution, start_time)
79
+ traces = execution.step_traces
80
+ trace = Trace.new(
81
+ trace_id: execution.trace_id,
82
+ total_latency_ms: elapsed_ms(start_time),
83
+ total_usage: aggregate_usage(traces),
84
+ step_traces: traces
85
+ )
86
+
87
+ Result.new(
88
+ status: execution.status, step_results: execution.step_results,
89
+ outputs_by_step: execution.outputs_by_step, failed_step: execution.failed_step,
90
+ trace: trace
91
+ )
92
+ end
93
+
94
+ def elapsed_ms(start_time)
95
+ ((Process.clock_gettime(Process::CLOCK_MONOTONIC) - start_time) * 1000).round
96
+ end
97
+
98
+ # Encapsulates mutable state during pipeline execution
99
+ class ExecutionState
100
+ attr_reader :trace_id, :step_results, :step_traces, :outputs_by_step,
101
+ :current_input, :status, :failed_step
102
+
103
+ def initialize(input)
104
+ @trace_id = SecureRandom.uuid
105
+ @step_results = []
106
+ @step_traces = []
107
+ @outputs_by_step = {}
108
+ @current_input = input
109
+ @status = :ok
110
+ @failed_step = nil
111
+ end
112
+
113
+ def record_step(step_alias, result)
114
+ @step_results << { alias: step_alias, result: result }
115
+ @step_traces << result.trace
116
+
117
+ if result.ok?
118
+ output = result.parsed_output
119
+ @outputs_by_step[step_alias] = output
120
+ @current_input = output
121
+ else
122
+ @status = result.status
123
+ @failed_step = step_alias
124
+ end
125
+ end
126
+
127
+ def mark_limit_failure(status, failed_alias)
128
+ @status = status
129
+ @failed_step = failed_alias
130
+ end
131
+
132
+ def failed?
133
+ @status != :ok
134
+ end
135
+ end
136
+ end
137
+ end
138
+ end
139
+ end
@@ -0,0 +1,72 @@
1
+ # frozen_string_literal: true
2
+
3
+ module RubyLLM
4
+ module Contract
5
+ module Pipeline
6
+ class Trace
7
+ include Concerns::TraceEquality
8
+
9
+ attr_reader :trace_id, :total_latency_ms, :total_usage, :step_traces, :total_cost
10
+
11
+ def initialize(trace_id: nil, total_latency_ms: nil, total_usage: nil, step_traces: nil)
12
+ @trace_id = trace_id
13
+ @total_latency_ms = total_latency_ms
14
+ @total_usage = total_usage
15
+ @step_traces = step_traces
16
+ @total_cost = calculate_total_cost
17
+ freeze
18
+ end
19
+
20
+ KNOWN_KEYS = %i[trace_id total_latency_ms total_usage step_traces total_cost].freeze
21
+
22
+ def [](key)
23
+ return nil unless KNOWN_KEYS.include?(key.to_sym)
24
+
25
+ public_send(key)
26
+ end
27
+
28
+ def to_h
29
+ { trace_id: @trace_id, total_latency_ms: @total_latency_ms,
30
+ total_usage: @total_usage, step_traces: @step_traces,
31
+ total_cost: @total_cost }.compact
32
+ end
33
+
34
+ def to_s
35
+ build_summary_parts.join(" ")
36
+ end
37
+
38
+ private
39
+
40
+ def build_summary_parts
41
+ parts = ["trace=#{@trace_id&.slice(0, 8)}"]
42
+ parts << "#{@total_latency_ms}ms" if @total_latency_ms
43
+ parts << format_token_usage if @total_usage.is_a?(Hash)
44
+ parts << "$#{format("%.6f", @total_cost)}" if @total_cost
45
+ parts << "(#{step_count} steps)"
46
+ parts
47
+ end
48
+
49
+ def format_token_usage
50
+ "#{@total_usage[:input_tokens] || 0}+#{@total_usage[:output_tokens] || 0} tokens"
51
+ end
52
+
53
+ def step_count
54
+ @step_traces.is_a?(Array) ? @step_traces.size : 0
55
+ end
56
+
57
+ def calculate_total_cost
58
+ return nil unless @step_traces.is_a?(Array)
59
+
60
+ costs = collect_step_costs
61
+ return nil if costs.empty?
62
+
63
+ costs.sum.round(6)
64
+ end
65
+
66
+ def collect_step_costs
67
+ @step_traces.filter_map { |step_trace| step_trace.respond_to?(:cost) ? step_trace.cost : nil }
68
+ end
69
+ end
70
+ end
71
+ end
72
+ end
@@ -0,0 +1,6 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative "pipeline/trace"
4
+ require_relative "pipeline/result"
5
+ require_relative "pipeline/runner"
6
+ require_relative "pipeline/base"