ruby_llm-contract 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.rspec +3 -0
- data/.rubocop.yml +55 -0
- data/CHANGELOG.md +76 -0
- data/Gemfile +11 -0
- data/Gemfile.lock +176 -0
- data/LICENSE +21 -0
- data/README.md +154 -0
- data/Rakefile +8 -0
- data/examples/00_basics.rb +500 -0
- data/examples/01_classify_threads.rb +220 -0
- data/examples/02_generate_comment.rb +203 -0
- data/examples/03_target_audience.rb +201 -0
- data/examples/04_real_llm.rb +410 -0
- data/examples/05_output_schema.rb +258 -0
- data/examples/07_keyword_extraction.rb +239 -0
- data/examples/08_translation.rb +353 -0
- data/examples/09_eval_dataset.rb +287 -0
- data/examples/10_reddit_full_showcase.rb +363 -0
- data/examples/README.md +140 -0
- data/lib/ruby_llm/contract/adapters/base.rb +13 -0
- data/lib/ruby_llm/contract/adapters/response.rb +17 -0
- data/lib/ruby_llm/contract/adapters/ruby_llm.rb +94 -0
- data/lib/ruby_llm/contract/adapters/test.rb +44 -0
- data/lib/ruby_llm/contract/adapters.rb +6 -0
- data/lib/ruby_llm/contract/concerns/deep_symbolize.rb +17 -0
- data/lib/ruby_llm/contract/concerns/eval_host.rb +109 -0
- data/lib/ruby_llm/contract/concerns/trace_equality.rb +15 -0
- data/lib/ruby_llm/contract/concerns/usage_aggregator.rb +43 -0
- data/lib/ruby_llm/contract/configuration.rb +21 -0
- data/lib/ruby_llm/contract/contract/definition.rb +39 -0
- data/lib/ruby_llm/contract/contract/invariant.rb +23 -0
- data/lib/ruby_llm/contract/contract/parser.rb +143 -0
- data/lib/ruby_llm/contract/contract/schema_validator.rb +239 -0
- data/lib/ruby_llm/contract/contract/validator.rb +104 -0
- data/lib/ruby_llm/contract/contract.rb +7 -0
- data/lib/ruby_llm/contract/cost_calculator.rb +38 -0
- data/lib/ruby_llm/contract/dsl.rb +13 -0
- data/lib/ruby_llm/contract/errors.rb +19 -0
- data/lib/ruby_llm/contract/eval/case_result.rb +76 -0
- data/lib/ruby_llm/contract/eval/contract_detail_builder.rb +47 -0
- data/lib/ruby_llm/contract/eval/dataset.rb +53 -0
- data/lib/ruby_llm/contract/eval/eval_definition.rb +112 -0
- data/lib/ruby_llm/contract/eval/evaluation_result.rb +27 -0
- data/lib/ruby_llm/contract/eval/evaluator/exact.rb +20 -0
- data/lib/ruby_llm/contract/eval/evaluator/json_includes.rb +58 -0
- data/lib/ruby_llm/contract/eval/evaluator/proc_evaluator.rb +40 -0
- data/lib/ruby_llm/contract/eval/evaluator/regex.rb +27 -0
- data/lib/ruby_llm/contract/eval/model_comparison.rb +80 -0
- data/lib/ruby_llm/contract/eval/pipeline_result_adapter.rb +15 -0
- data/lib/ruby_llm/contract/eval/report.rb +115 -0
- data/lib/ruby_llm/contract/eval/runner.rb +162 -0
- data/lib/ruby_llm/contract/eval/trait_evaluator.rb +75 -0
- data/lib/ruby_llm/contract/eval.rb +16 -0
- data/lib/ruby_llm/contract/pipeline/base.rb +62 -0
- data/lib/ruby_llm/contract/pipeline/result.rb +131 -0
- data/lib/ruby_llm/contract/pipeline/runner.rb +139 -0
- data/lib/ruby_llm/contract/pipeline/trace.rb +72 -0
- data/lib/ruby_llm/contract/pipeline.rb +6 -0
- data/lib/ruby_llm/contract/prompt/ast.rb +38 -0
- data/lib/ruby_llm/contract/prompt/builder.rb +47 -0
- data/lib/ruby_llm/contract/prompt/node.rb +25 -0
- data/lib/ruby_llm/contract/prompt/nodes/example_node.rb +27 -0
- data/lib/ruby_llm/contract/prompt/nodes/rule_node.rb +15 -0
- data/lib/ruby_llm/contract/prompt/nodes/section_node.rb +26 -0
- data/lib/ruby_llm/contract/prompt/nodes/system_node.rb +15 -0
- data/lib/ruby_llm/contract/prompt/nodes/user_node.rb +15 -0
- data/lib/ruby_llm/contract/prompt/nodes.rb +7 -0
- data/lib/ruby_llm/contract/prompt/renderer.rb +76 -0
- data/lib/ruby_llm/contract/railtie.rb +20 -0
- data/lib/ruby_llm/contract/rake_task.rb +78 -0
- data/lib/ruby_llm/contract/rspec/pass_eval.rb +96 -0
- data/lib/ruby_llm/contract/rspec/satisfy_contract.rb +31 -0
- data/lib/ruby_llm/contract/rspec.rb +6 -0
- data/lib/ruby_llm/contract/step/base.rb +138 -0
- data/lib/ruby_llm/contract/step/dsl.rb +144 -0
- data/lib/ruby_llm/contract/step/limit_checker.rb +64 -0
- data/lib/ruby_llm/contract/step/result.rb +38 -0
- data/lib/ruby_llm/contract/step/retry_executor.rb +90 -0
- data/lib/ruby_llm/contract/step/retry_policy.rb +76 -0
- data/lib/ruby_llm/contract/step/runner.rb +126 -0
- data/lib/ruby_llm/contract/step/trace.rb +70 -0
- data/lib/ruby_llm/contract/step.rb +10 -0
- data/lib/ruby_llm/contract/token_estimator.rb +19 -0
- data/lib/ruby_llm/contract/types.rb +11 -0
- data/lib/ruby_llm/contract/version.rb +7 -0
- data/lib/ruby_llm/contract.rb +108 -0
- data/ruby_llm-contract.gemspec +33 -0
- metadata +172 -0
|
@@ -0,0 +1,162 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module RubyLLM
|
|
4
|
+
module Contract
|
|
5
|
+
module Eval
|
|
6
|
+
class Runner
|
|
7
|
+
include TraitEvaluator
|
|
8
|
+
include ContractDetailBuilder
|
|
9
|
+
|
|
10
|
+
def self.run(step:, dataset:, context: {})
|
|
11
|
+
new(step: step, dataset: dataset, context: context).run
|
|
12
|
+
end
|
|
13
|
+
|
|
14
|
+
def initialize(step:, dataset:, context: {})
|
|
15
|
+
@step = step
|
|
16
|
+
@dataset = dataset
|
|
17
|
+
@context = context
|
|
18
|
+
end
|
|
19
|
+
|
|
20
|
+
def run
|
|
21
|
+
results = @dataset.cases.map { |test_case| evaluate_case(test_case) }
|
|
22
|
+
Report.new(dataset_name: @dataset.name, results: results)
|
|
23
|
+
end
|
|
24
|
+
|
|
25
|
+
private
|
|
26
|
+
|
|
27
|
+
def evaluate_case(test_case)
|
|
28
|
+
run_result = @step.run(test_case.input, context: @context)
|
|
29
|
+
step_result = normalize_result(run_result)
|
|
30
|
+
eval_result = dispatch_evaluation(step_result, test_case)
|
|
31
|
+
|
|
32
|
+
build_case_result(test_case, step_result, eval_result)
|
|
33
|
+
rescue RubyLLM::Contract::Error => e
|
|
34
|
+
# No adapter configured — skip this case (offline mode without sample_response)
|
|
35
|
+
skipped_result(test_case, e.message)
|
|
36
|
+
end
|
|
37
|
+
|
|
38
|
+
def build_case_result(test_case, step_result, eval_result)
|
|
39
|
+
trace = step_result.respond_to?(:trace) ? step_result.trace : nil
|
|
40
|
+
CaseResult.new(
|
|
41
|
+
name: test_case.name,
|
|
42
|
+
input: test_case.input,
|
|
43
|
+
output: step_result.parsed_output,
|
|
44
|
+
expected: test_case.expected,
|
|
45
|
+
step_status: step_result.status,
|
|
46
|
+
score: eval_result.score,
|
|
47
|
+
passed: eval_result.passed,
|
|
48
|
+
label: eval_result.label,
|
|
49
|
+
details: eval_result.details,
|
|
50
|
+
duration_ms: extract_latency(trace),
|
|
51
|
+
cost: extract_cost(trace)
|
|
52
|
+
)
|
|
53
|
+
end
|
|
54
|
+
|
|
55
|
+
def extract_latency(trace)
|
|
56
|
+
return nil unless trace
|
|
57
|
+
|
|
58
|
+
# Pipeline::Trace uses total_latency_ms, Step::Trace uses latency_ms
|
|
59
|
+
if trace.respond_to?(:total_latency_ms)
|
|
60
|
+
trace.total_latency_ms
|
|
61
|
+
else
|
|
62
|
+
trace[:latency_ms]
|
|
63
|
+
end
|
|
64
|
+
end
|
|
65
|
+
|
|
66
|
+
def extract_cost(trace)
|
|
67
|
+
return nil unless trace
|
|
68
|
+
|
|
69
|
+
# Pipeline::Trace uses total_cost, Step::Trace uses cost
|
|
70
|
+
if trace.respond_to?(:total_cost)
|
|
71
|
+
trace.total_cost
|
|
72
|
+
else
|
|
73
|
+
trace[:cost]
|
|
74
|
+
end
|
|
75
|
+
end
|
|
76
|
+
|
|
77
|
+
def dispatch_evaluation(step_result, test_case)
|
|
78
|
+
return contract_failure(step_result) unless step_result.ok?
|
|
79
|
+
|
|
80
|
+
if test_case.evaluator
|
|
81
|
+
evaluate_with_custom(step_result, test_case)
|
|
82
|
+
elsif test_case.expected_traits
|
|
83
|
+
evaluate_traits(step_result, test_case)
|
|
84
|
+
elsif test_case.expected
|
|
85
|
+
evaluate_expected(step_result, test_case)
|
|
86
|
+
else
|
|
87
|
+
evaluate_contract_only
|
|
88
|
+
end
|
|
89
|
+
end
|
|
90
|
+
|
|
91
|
+
def normalize_result(result)
|
|
92
|
+
return result if result.respond_to?(:parsed_output)
|
|
93
|
+
|
|
94
|
+
normalize_pipeline_result(result)
|
|
95
|
+
end
|
|
96
|
+
|
|
97
|
+
def normalize_pipeline_result(result)
|
|
98
|
+
last_result = result.step_results&.last&.dig(:result)
|
|
99
|
+
is_ok = result.ok?
|
|
100
|
+
pipeline_trace = result.respond_to?(:trace) ? result.trace : nil
|
|
101
|
+
|
|
102
|
+
PipelineResultAdapter.new(
|
|
103
|
+
status: result.status,
|
|
104
|
+
ok_flag: is_ok,
|
|
105
|
+
parsed_output: is_ok ? result.outputs_by_step.values.last : nil,
|
|
106
|
+
validation_errors: last_result.respond_to?(:validation_errors) ? last_result.validation_errors : [],
|
|
107
|
+
trace: pipeline_trace || (last_result.respond_to?(:trace) ? last_result.trace : {})
|
|
108
|
+
)
|
|
109
|
+
end
|
|
110
|
+
|
|
111
|
+
def evaluate_expected(step_result, test_case)
|
|
112
|
+
dispatch_expected_evaluator(
|
|
113
|
+
output: step_result.parsed_output,
|
|
114
|
+
expected: test_case.expected,
|
|
115
|
+
input: test_case.input
|
|
116
|
+
)
|
|
117
|
+
end
|
|
118
|
+
|
|
119
|
+
def dispatch_expected_evaluator(output:, expected:, input:)
|
|
120
|
+
if expected.is_a?(Hash)
|
|
121
|
+
Evaluator::JsonIncludes.new.call(output: output, expected: expected, input: input)
|
|
122
|
+
elsif expected.is_a?(::Regexp)
|
|
123
|
+
Evaluator::Regex.new(expected).call(output: output, input: input)
|
|
124
|
+
else
|
|
125
|
+
Evaluator::Exact.new.call(output: output, expected: expected, input: input)
|
|
126
|
+
end
|
|
127
|
+
end
|
|
128
|
+
|
|
129
|
+
def evaluate_with_custom(step_result, test_case)
|
|
130
|
+
evaluator = test_case.evaluator
|
|
131
|
+
evaluator = Evaluator::ProcEvaluator.new(evaluator) if evaluator.is_a?(::Proc)
|
|
132
|
+
evaluator.call(output: step_result.parsed_output, expected: test_case.expected, input: test_case.input)
|
|
133
|
+
end
|
|
134
|
+
|
|
135
|
+
def evaluate_contract_only
|
|
136
|
+
EvaluationResult.new(score: 1.0, passed: true, details: build_contract_details)
|
|
137
|
+
end
|
|
138
|
+
|
|
139
|
+
def contract_failure(step_result)
|
|
140
|
+
EvaluationResult.new(
|
|
141
|
+
score: 0.0, passed: false,
|
|
142
|
+
details: "step failed: #{step_result.status} — #{step_result.validation_errors.join(", ")}"
|
|
143
|
+
)
|
|
144
|
+
end
|
|
145
|
+
|
|
146
|
+
def skipped_result(test_case, reason)
|
|
147
|
+
CaseResult.new(
|
|
148
|
+
name: test_case.name,
|
|
149
|
+
input: test_case.input,
|
|
150
|
+
output: nil,
|
|
151
|
+
expected: test_case.expected,
|
|
152
|
+
step_status: :skipped,
|
|
153
|
+
score: 0.0,
|
|
154
|
+
passed: false,
|
|
155
|
+
label: "SKIP",
|
|
156
|
+
details: "skipped: #{reason}"
|
|
157
|
+
)
|
|
158
|
+
end
|
|
159
|
+
end
|
|
160
|
+
end
|
|
161
|
+
end
|
|
162
|
+
end
|
|
@@ -0,0 +1,75 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module RubyLLM
|
|
4
|
+
module Contract
|
|
5
|
+
module Eval
|
|
6
|
+
# Extracted from Runner to reduce class length.
|
|
7
|
+
# Evaluates expected_traits against parsed output.
|
|
8
|
+
module TraitEvaluator
|
|
9
|
+
private
|
|
10
|
+
|
|
11
|
+
def evaluate_traits(step_result, test_case)
|
|
12
|
+
output = step_result.parsed_output
|
|
13
|
+
traits = test_case.expected_traits
|
|
14
|
+
errors = traits.each_with_object([]) do |(key, expectation), errs|
|
|
15
|
+
check_trait(output, key, expectation, errs)
|
|
16
|
+
end
|
|
17
|
+
|
|
18
|
+
build_trait_result(errors, traits.length)
|
|
19
|
+
end
|
|
20
|
+
|
|
21
|
+
def check_trait(output, key, expectation, errors)
|
|
22
|
+
value = output.is_a?(Hash) ? output[key] : nil
|
|
23
|
+
error_msg = trait_error(key, value, expectation)
|
|
24
|
+
errors << error_msg if error_msg
|
|
25
|
+
end
|
|
26
|
+
|
|
27
|
+
def trait_error(key, value, expectation)
|
|
28
|
+
case expectation
|
|
29
|
+
when ::Regexp
|
|
30
|
+
trait_regexp_error(key, value, expectation)
|
|
31
|
+
when Range
|
|
32
|
+
trait_range_error(key, value, expectation)
|
|
33
|
+
when true
|
|
34
|
+
trait_truthy_error(key, value)
|
|
35
|
+
when false
|
|
36
|
+
trait_falsy_error(key, value)
|
|
37
|
+
else
|
|
38
|
+
trait_equality_error(key, value, expectation)
|
|
39
|
+
end
|
|
40
|
+
end
|
|
41
|
+
|
|
42
|
+
def trait_regexp_error(key, value, expectation)
|
|
43
|
+
"#{key}: does not match #{expectation.inspect}" unless value.to_s.match?(expectation)
|
|
44
|
+
end
|
|
45
|
+
|
|
46
|
+
def trait_range_error(key, value, expectation)
|
|
47
|
+
comparable = value.is_a?(Numeric) ? value : value.to_s.length
|
|
48
|
+
"#{key}: #{value.inspect} not in #{expectation}" unless expectation.include?(comparable)
|
|
49
|
+
end
|
|
50
|
+
|
|
51
|
+
def trait_truthy_error(key, value)
|
|
52
|
+
"#{key}: expected truthy, got #{value.inspect}" unless value
|
|
53
|
+
end
|
|
54
|
+
|
|
55
|
+
def trait_falsy_error(key, value)
|
|
56
|
+
"#{key}: expected falsy, got #{value.inspect}" if value
|
|
57
|
+
end
|
|
58
|
+
|
|
59
|
+
def trait_equality_error(key, value, expectation)
|
|
60
|
+
"#{key}: expected #{expectation.inspect}, got #{value.inspect}" unless value == expectation
|
|
61
|
+
end
|
|
62
|
+
|
|
63
|
+
def build_trait_result(errors, trait_count)
|
|
64
|
+
if errors.empty?
|
|
65
|
+
EvaluationResult.new(score: 1.0, passed: true, details: "all traits match")
|
|
66
|
+
else
|
|
67
|
+
matched = trait_count - errors.length
|
|
68
|
+
score = trait_count.zero? ? 0.0 : matched.to_f / trait_count
|
|
69
|
+
EvaluationResult.new(score: score, passed: false, details: errors.join("; "))
|
|
70
|
+
end
|
|
71
|
+
end
|
|
72
|
+
end
|
|
73
|
+
end
|
|
74
|
+
end
|
|
75
|
+
end
|
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require_relative "eval/evaluation_result"
|
|
4
|
+
require_relative "eval/case_result"
|
|
5
|
+
require_relative "eval/evaluator/exact"
|
|
6
|
+
require_relative "eval/evaluator/regex"
|
|
7
|
+
require_relative "eval/evaluator/json_includes"
|
|
8
|
+
require_relative "eval/evaluator/proc_evaluator"
|
|
9
|
+
require_relative "eval/dataset"
|
|
10
|
+
require_relative "eval/pipeline_result_adapter"
|
|
11
|
+
require_relative "eval/trait_evaluator"
|
|
12
|
+
require_relative "eval/contract_detail_builder"
|
|
13
|
+
require_relative "eval/runner"
|
|
14
|
+
require_relative "eval/report"
|
|
15
|
+
require_relative "eval/eval_definition"
|
|
16
|
+
require_relative "eval/model_comparison"
|
|
@@ -0,0 +1,62 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module RubyLLM
|
|
4
|
+
module Contract
|
|
5
|
+
module Pipeline
|
|
6
|
+
class Base
|
|
7
|
+
def self.inherited(subclass)
|
|
8
|
+
super
|
|
9
|
+
Contract.register_eval_host(subclass) if respond_to?(:eval_defined?) && eval_defined?
|
|
10
|
+
end
|
|
11
|
+
|
|
12
|
+
class << self
|
|
13
|
+
include Concerns::EvalHost
|
|
14
|
+
|
|
15
|
+
# depends_on is accepted for forward compatibility with DAG pipelines (v0.3).
|
|
16
|
+
# Currently, execution is always linear in declaration order.
|
|
17
|
+
def step(step_class, as:, depends_on: nil, model: nil)
|
|
18
|
+
validate_dependency!(depends_on) if depends_on
|
|
19
|
+
steps_registry << { step_class: step_class, alias: as, depends_on: depends_on, model: model }
|
|
20
|
+
end
|
|
21
|
+
|
|
22
|
+
def steps
|
|
23
|
+
steps_registry.dup.freeze
|
|
24
|
+
end
|
|
25
|
+
|
|
26
|
+
# Internal mutable steps list for registration
|
|
27
|
+
def steps_registry
|
|
28
|
+
@steps_registry ||= []
|
|
29
|
+
end
|
|
30
|
+
|
|
31
|
+
def token_budget(limit = nil)
|
|
32
|
+
return @token_budget = limit if limit
|
|
33
|
+
|
|
34
|
+
@token_budget
|
|
35
|
+
end
|
|
36
|
+
|
|
37
|
+
def run(input, context: {}, timeout_ms: nil)
|
|
38
|
+
Runner.new(steps: steps, context: context, timeout_ms: timeout_ms, token_budget: token_budget).call(input)
|
|
39
|
+
end
|
|
40
|
+
|
|
41
|
+
def test(input, responses: {}, timeout_ms: nil)
|
|
42
|
+
ordered_responses = steps.map { |step_entry| responses.fetch(step_entry[:alias], "") }
|
|
43
|
+
adapter = Adapters::Test.new(responses: ordered_responses)
|
|
44
|
+
run(input, context: { adapter: adapter }, timeout_ms: timeout_ms)
|
|
45
|
+
end
|
|
46
|
+
|
|
47
|
+
private
|
|
48
|
+
|
|
49
|
+
def known_step_aliases
|
|
50
|
+
steps_registry.map { |step_entry| step_entry[:alias] }
|
|
51
|
+
end
|
|
52
|
+
|
|
53
|
+
def validate_dependency!(dep)
|
|
54
|
+
return if known_step_aliases.include?(dep)
|
|
55
|
+
|
|
56
|
+
raise ArgumentError, "Unknown dependency: #{dep.inspect}. Known steps: #{known_step_aliases.inspect}"
|
|
57
|
+
end
|
|
58
|
+
end
|
|
59
|
+
end
|
|
60
|
+
end
|
|
61
|
+
end
|
|
62
|
+
end
|
|
@@ -0,0 +1,131 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module RubyLLM
|
|
4
|
+
module Contract
|
|
5
|
+
module Pipeline
|
|
6
|
+
class Result
|
|
7
|
+
attr_reader :status, :step_results, :outputs_by_step, :failed_step, :trace
|
|
8
|
+
|
|
9
|
+
# Column widths for pretty_print table
|
|
10
|
+
COL1 = 14 # step name
|
|
11
|
+
COL2 = 10 # status
|
|
12
|
+
COL3 = 50 # output
|
|
13
|
+
|
|
14
|
+
TOP_BORDER = "+#{"-" * (COL1 + COL2 + COL3 + 8)}+".freeze
|
|
15
|
+
MID_BORDER = "+-#{"-" * COL1}-+-#{"-" * COL2}-+-#{"-" * COL3}-+".freeze
|
|
16
|
+
|
|
17
|
+
def initialize(status:, step_results:, outputs_by_step:, failed_step: nil, trace: Trace.new)
|
|
18
|
+
@status = status
|
|
19
|
+
@step_results = step_results.each(&:freeze).freeze
|
|
20
|
+
@outputs_by_step = outputs_by_step.freeze
|
|
21
|
+
@failed_step = failed_step
|
|
22
|
+
@trace = trace
|
|
23
|
+
freeze
|
|
24
|
+
end
|
|
25
|
+
|
|
26
|
+
def ok?
|
|
27
|
+
@status == :ok
|
|
28
|
+
end
|
|
29
|
+
|
|
30
|
+
def failed?
|
|
31
|
+
@status != :ok
|
|
32
|
+
end
|
|
33
|
+
|
|
34
|
+
def to_s
|
|
35
|
+
lines = [header_line]
|
|
36
|
+
@step_results.each { |sr| lines << step_line(sr) }
|
|
37
|
+
lines.join("\n")
|
|
38
|
+
end
|
|
39
|
+
|
|
40
|
+
def pretty_print(io = $stdout)
|
|
41
|
+
build_table.each { |line| io.puts line }
|
|
42
|
+
end
|
|
43
|
+
|
|
44
|
+
private
|
|
45
|
+
|
|
46
|
+
def build_table
|
|
47
|
+
header_width = COL1 + COL2 + COL3 + 2
|
|
48
|
+
[TOP_BORDER,
|
|
49
|
+
"| #{header_line.ljust(header_width)} |",
|
|
50
|
+
MID_BORDER,
|
|
51
|
+
"| #{"Step".ljust(COL1)} | #{"Status".ljust(COL2)} | #{"Output".ljust(COL3)} |",
|
|
52
|
+
MID_BORDER,
|
|
53
|
+
*build_step_rows,
|
|
54
|
+
TOP_BORDER]
|
|
55
|
+
end
|
|
56
|
+
|
|
57
|
+
def build_step_rows
|
|
58
|
+
rows = []
|
|
59
|
+
@step_results.each_with_index do |sr, idx|
|
|
60
|
+
rows.concat(build_single_step_rows(sr))
|
|
61
|
+
rows << MID_BORDER if idx < @step_results.size - 1
|
|
62
|
+
end
|
|
63
|
+
rows
|
|
64
|
+
end
|
|
65
|
+
|
|
66
|
+
def build_single_step_rows(step_record)
|
|
67
|
+
step_alias = step_record[:alias].to_s
|
|
68
|
+
status_str = step_status(step_record[:result])
|
|
69
|
+
output_lines = format_output(@outputs_by_step[step_record[:alias]])
|
|
70
|
+
first_row = build_first_step_row(step_alias, status_str, output_lines.first || "")
|
|
71
|
+
continuation_rows = build_continuation_rows(output_lines.drop(1))
|
|
72
|
+
|
|
73
|
+
[first_row, *continuation_rows]
|
|
74
|
+
end
|
|
75
|
+
|
|
76
|
+
def build_first_step_row(step_alias, status_str, first_line)
|
|
77
|
+
"| #{step_alias.ljust(COL1)} | #{status_str.ljust(COL2)} | #{first_line.ljust(COL3)} |"
|
|
78
|
+
end
|
|
79
|
+
|
|
80
|
+
def build_continuation_rows(lines)
|
|
81
|
+
blank_prefix = "| #{" " * COL1} | #{" " * COL2} | "
|
|
82
|
+
lines.map { |line| "#{blank_prefix}#{line.ljust(COL3)} |" }
|
|
83
|
+
end
|
|
84
|
+
|
|
85
|
+
def header_line
|
|
86
|
+
parts = ["Pipeline: #{@status}"]
|
|
87
|
+
append_trace_details(parts) if @trace
|
|
88
|
+
parts.join(" ")
|
|
89
|
+
end
|
|
90
|
+
|
|
91
|
+
def append_trace_details(parts)
|
|
92
|
+
parts << "#{@step_results.size} steps"
|
|
93
|
+
parts << "#{@trace.total_latency_ms}ms" if @trace.total_latency_ms
|
|
94
|
+
append_usage_details(parts)
|
|
95
|
+
parts << "$#{format("%.6f", @trace.total_cost)}" if @trace.total_cost
|
|
96
|
+
parts << "trace=#{@trace.trace_id&.slice(0, 8)}" if @trace.trace_id
|
|
97
|
+
end
|
|
98
|
+
|
|
99
|
+
def append_usage_details(parts)
|
|
100
|
+
usage = @trace.total_usage
|
|
101
|
+
return unless usage.is_a?(Hash)
|
|
102
|
+
|
|
103
|
+
parts << "#{usage[:input_tokens]}+#{usage[:output_tokens]} tokens"
|
|
104
|
+
end
|
|
105
|
+
|
|
106
|
+
def step_line(step_record)
|
|
107
|
+
step_result = step_record[:result]
|
|
108
|
+
trace = step_result.trace
|
|
109
|
+
status = step_status(step_result)
|
|
110
|
+
trace_str = trace.respond_to?(:to_s) ? trace.to_s : ""
|
|
111
|
+
" #{step_record[:alias].to_s.ljust(14)} #{status.ljust(10)} #{trace_str}"
|
|
112
|
+
end
|
|
113
|
+
|
|
114
|
+
def step_status(step_result)
|
|
115
|
+
step_result.ok? ? "ok" : step_result.status.to_s
|
|
116
|
+
end
|
|
117
|
+
|
|
118
|
+
def format_output(output)
|
|
119
|
+
return ["(no output)"] unless output
|
|
120
|
+
|
|
121
|
+
pairs = output.is_a?(Hash) ? output : { value: output }
|
|
122
|
+
pairs.map do |key, val|
|
|
123
|
+
str = val.is_a?(String) ? val : val.inspect
|
|
124
|
+
line = "#{key}: #{str}"
|
|
125
|
+
line.size > COL3 ? "#{line[0, COL3 - 3]}..." : line
|
|
126
|
+
end
|
|
127
|
+
end
|
|
128
|
+
end
|
|
129
|
+
end
|
|
130
|
+
end
|
|
131
|
+
end
|
|
@@ -0,0 +1,139 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require "securerandom"
|
|
4
|
+
|
|
5
|
+
module RubyLLM
|
|
6
|
+
module Contract
|
|
7
|
+
module Pipeline
|
|
8
|
+
class Runner
|
|
9
|
+
include Concerns::UsageAggregator
|
|
10
|
+
|
|
11
|
+
def initialize(steps:, context:, timeout_ms: nil, token_budget: nil)
|
|
12
|
+
raise ArgumentError, "timeout_ms must be positive (got #{timeout_ms})" if timeout_ms && timeout_ms <= 0
|
|
13
|
+
raise ArgumentError, "Pipeline has no steps defined" if steps.empty?
|
|
14
|
+
|
|
15
|
+
@steps = steps
|
|
16
|
+
@context = context
|
|
17
|
+
@timeout_ms = timeout_ms
|
|
18
|
+
@token_budget = token_budget
|
|
19
|
+
end
|
|
20
|
+
|
|
21
|
+
def call(input)
|
|
22
|
+
execution = ExecutionState.new(input)
|
|
23
|
+
start_time = Process.clock_gettime(Process::CLOCK_MONOTONIC)
|
|
24
|
+
run_steps(execution, start_time)
|
|
25
|
+
finalize_result(execution, start_time)
|
|
26
|
+
end
|
|
27
|
+
|
|
28
|
+
def run_steps(execution, start_time)
|
|
29
|
+
@steps.each_with_index do |step_def, index|
|
|
30
|
+
execute_step(step_def, execution)
|
|
31
|
+
break if execution.failed?
|
|
32
|
+
break if check_limits(index, step_def, execution, start_time)
|
|
33
|
+
end
|
|
34
|
+
end
|
|
35
|
+
|
|
36
|
+
private
|
|
37
|
+
|
|
38
|
+
def execute_step(step_def, execution)
|
|
39
|
+
step_context = build_step_context(step_def)
|
|
40
|
+
result = step_def[:step_class].run(execution.current_input, context: step_context)
|
|
41
|
+
|
|
42
|
+
execution.record_step(step_def[:alias], result)
|
|
43
|
+
end
|
|
44
|
+
|
|
45
|
+
def build_step_context(step_def)
|
|
46
|
+
model = step_def[:model]
|
|
47
|
+
model ? @context.merge(model: model) : @context
|
|
48
|
+
end
|
|
49
|
+
|
|
50
|
+
def check_limits(index, step_def, execution, start_time)
|
|
51
|
+
limit_status = detect_limit_violation(execution, start_time)
|
|
52
|
+
return unless limit_status
|
|
53
|
+
|
|
54
|
+
failing_alias = next_step_alias(index, step_def)
|
|
55
|
+
execution.mark_limit_failure(limit_status, failing_alias)
|
|
56
|
+
true
|
|
57
|
+
end
|
|
58
|
+
|
|
59
|
+
# NOTE: This is a cooperative timeout, not a hard deadline. The timeout is
|
|
60
|
+
# checked between steps, after each step completes. A slow step (e.g. long
|
|
61
|
+
# LLM call or multi-attempt retry) can exceed the deadline before the check
|
|
62
|
+
# runs. This is a known architectural limitation -- safely interrupting a
|
|
63
|
+
# running HTTP call in Ruby requires threads/fibers, which adds significant
|
|
64
|
+
# complexity. For most pipelines this cooperative approach is sufficient;
|
|
65
|
+
# set timeout_ms with enough headroom for your slowest expected step.
|
|
66
|
+
def detect_limit_violation(execution, start_time)
|
|
67
|
+
if @timeout_ms && elapsed_ms(start_time) >= @timeout_ms
|
|
68
|
+
:timeout
|
|
69
|
+
elsif @token_budget && sum_tokens(execution.step_traces) > @token_budget
|
|
70
|
+
:budget_exceeded
|
|
71
|
+
end
|
|
72
|
+
end
|
|
73
|
+
|
|
74
|
+
def next_step_alias(index, step_def)
|
|
75
|
+
@steps[index + 1]&.dig(:alias) || step_def[:alias]
|
|
76
|
+
end
|
|
77
|
+
|
|
78
|
+
def finalize_result(execution, start_time)
|
|
79
|
+
traces = execution.step_traces
|
|
80
|
+
trace = Trace.new(
|
|
81
|
+
trace_id: execution.trace_id,
|
|
82
|
+
total_latency_ms: elapsed_ms(start_time),
|
|
83
|
+
total_usage: aggregate_usage(traces),
|
|
84
|
+
step_traces: traces
|
|
85
|
+
)
|
|
86
|
+
|
|
87
|
+
Result.new(
|
|
88
|
+
status: execution.status, step_results: execution.step_results,
|
|
89
|
+
outputs_by_step: execution.outputs_by_step, failed_step: execution.failed_step,
|
|
90
|
+
trace: trace
|
|
91
|
+
)
|
|
92
|
+
end
|
|
93
|
+
|
|
94
|
+
def elapsed_ms(start_time)
|
|
95
|
+
((Process.clock_gettime(Process::CLOCK_MONOTONIC) - start_time) * 1000).round
|
|
96
|
+
end
|
|
97
|
+
|
|
98
|
+
# Encapsulates mutable state during pipeline execution
|
|
99
|
+
class ExecutionState
|
|
100
|
+
attr_reader :trace_id, :step_results, :step_traces, :outputs_by_step,
|
|
101
|
+
:current_input, :status, :failed_step
|
|
102
|
+
|
|
103
|
+
def initialize(input)
|
|
104
|
+
@trace_id = SecureRandom.uuid
|
|
105
|
+
@step_results = []
|
|
106
|
+
@step_traces = []
|
|
107
|
+
@outputs_by_step = {}
|
|
108
|
+
@current_input = input
|
|
109
|
+
@status = :ok
|
|
110
|
+
@failed_step = nil
|
|
111
|
+
end
|
|
112
|
+
|
|
113
|
+
def record_step(step_alias, result)
|
|
114
|
+
@step_results << { alias: step_alias, result: result }
|
|
115
|
+
@step_traces << result.trace
|
|
116
|
+
|
|
117
|
+
if result.ok?
|
|
118
|
+
output = result.parsed_output
|
|
119
|
+
@outputs_by_step[step_alias] = output
|
|
120
|
+
@current_input = output
|
|
121
|
+
else
|
|
122
|
+
@status = result.status
|
|
123
|
+
@failed_step = step_alias
|
|
124
|
+
end
|
|
125
|
+
end
|
|
126
|
+
|
|
127
|
+
def mark_limit_failure(status, failed_alias)
|
|
128
|
+
@status = status
|
|
129
|
+
@failed_step = failed_alias
|
|
130
|
+
end
|
|
131
|
+
|
|
132
|
+
def failed?
|
|
133
|
+
@status != :ok
|
|
134
|
+
end
|
|
135
|
+
end
|
|
136
|
+
end
|
|
137
|
+
end
|
|
138
|
+
end
|
|
139
|
+
end
|
|
@@ -0,0 +1,72 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module RubyLLM
|
|
4
|
+
module Contract
|
|
5
|
+
module Pipeline
|
|
6
|
+
class Trace
|
|
7
|
+
include Concerns::TraceEquality
|
|
8
|
+
|
|
9
|
+
attr_reader :trace_id, :total_latency_ms, :total_usage, :step_traces, :total_cost
|
|
10
|
+
|
|
11
|
+
def initialize(trace_id: nil, total_latency_ms: nil, total_usage: nil, step_traces: nil)
|
|
12
|
+
@trace_id = trace_id
|
|
13
|
+
@total_latency_ms = total_latency_ms
|
|
14
|
+
@total_usage = total_usage
|
|
15
|
+
@step_traces = step_traces
|
|
16
|
+
@total_cost = calculate_total_cost
|
|
17
|
+
freeze
|
|
18
|
+
end
|
|
19
|
+
|
|
20
|
+
KNOWN_KEYS = %i[trace_id total_latency_ms total_usage step_traces total_cost].freeze
|
|
21
|
+
|
|
22
|
+
def [](key)
|
|
23
|
+
return nil unless KNOWN_KEYS.include?(key.to_sym)
|
|
24
|
+
|
|
25
|
+
public_send(key)
|
|
26
|
+
end
|
|
27
|
+
|
|
28
|
+
def to_h
|
|
29
|
+
{ trace_id: @trace_id, total_latency_ms: @total_latency_ms,
|
|
30
|
+
total_usage: @total_usage, step_traces: @step_traces,
|
|
31
|
+
total_cost: @total_cost }.compact
|
|
32
|
+
end
|
|
33
|
+
|
|
34
|
+
def to_s
|
|
35
|
+
build_summary_parts.join(" ")
|
|
36
|
+
end
|
|
37
|
+
|
|
38
|
+
private
|
|
39
|
+
|
|
40
|
+
def build_summary_parts
|
|
41
|
+
parts = ["trace=#{@trace_id&.slice(0, 8)}"]
|
|
42
|
+
parts << "#{@total_latency_ms}ms" if @total_latency_ms
|
|
43
|
+
parts << format_token_usage if @total_usage.is_a?(Hash)
|
|
44
|
+
parts << "$#{format("%.6f", @total_cost)}" if @total_cost
|
|
45
|
+
parts << "(#{step_count} steps)"
|
|
46
|
+
parts
|
|
47
|
+
end
|
|
48
|
+
|
|
49
|
+
def format_token_usage
|
|
50
|
+
"#{@total_usage[:input_tokens] || 0}+#{@total_usage[:output_tokens] || 0} tokens"
|
|
51
|
+
end
|
|
52
|
+
|
|
53
|
+
def step_count
|
|
54
|
+
@step_traces.is_a?(Array) ? @step_traces.size : 0
|
|
55
|
+
end
|
|
56
|
+
|
|
57
|
+
def calculate_total_cost
|
|
58
|
+
return nil unless @step_traces.is_a?(Array)
|
|
59
|
+
|
|
60
|
+
costs = collect_step_costs
|
|
61
|
+
return nil if costs.empty?
|
|
62
|
+
|
|
63
|
+
costs.sum.round(6)
|
|
64
|
+
end
|
|
65
|
+
|
|
66
|
+
def collect_step_costs
|
|
67
|
+
@step_traces.filter_map { |step_trace| step_trace.respond_to?(:cost) ? step_trace.cost : nil }
|
|
68
|
+
end
|
|
69
|
+
end
|
|
70
|
+
end
|
|
71
|
+
end
|
|
72
|
+
end
|