RubyGems - ruby_llm-contract - Versions diffs - 0.4.5 → 0.5.0 - Mend

ruby_llm-contract 0.4.5 → 0.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (56) hide show

checksums.yaml +4 -4
data/.rubycritic.yml +8 -0
data/.simplecov +22 -0
data/CHANGELOG.md +19 -0
data/Gemfile +2 -0
data/Gemfile.lock +104 -2
data/README.md +42 -2
data/lib/ruby_llm/contract/concerns/context_helpers.rb +11 -10
data/lib/ruby_llm/contract/concerns/deep_freeze.rb +13 -7
data/lib/ruby_llm/contract/concerns/deep_symbolize.rb +15 -5
data/lib/ruby_llm/contract/concerns/eval_host.rb +51 -7
data/lib/ruby_llm/contract/contract/schema_validator/bound_rule.rb +85 -0
data/lib/ruby_llm/contract/contract/schema_validator/enum_rule.rb +23 -0
data/lib/ruby_llm/contract/contract/schema_validator/node.rb +70 -0
data/lib/ruby_llm/contract/contract/schema_validator/object_rules.rb +66 -0
data/lib/ruby_llm/contract/contract/schema_validator/scalar_rules.rb +22 -0
data/lib/ruby_llm/contract/contract/schema_validator/schema_extractor.rb +23 -0
data/lib/ruby_llm/contract/contract/schema_validator/type_rule.rb +30 -0
data/lib/ruby_llm/contract/contract/schema_validator.rb +41 -266
data/lib/ruby_llm/contract/contract/validator.rb +9 -0
data/lib/ruby_llm/contract/eval/case_executor.rb +52 -0
data/lib/ruby_llm/contract/eval/case_result_builder.rb +35 -0
data/lib/ruby_llm/contract/eval/case_scorer.rb +66 -0
data/lib/ruby_llm/contract/eval/evaluator/exact.rb +8 -6
data/lib/ruby_llm/contract/eval/evaluator/proc_evaluator.rb +22 -10
data/lib/ruby_llm/contract/eval/evaluator/regex.rb +11 -8
data/lib/ruby_llm/contract/eval/expectation_evaluator.rb +26 -0
data/lib/ruby_llm/contract/eval/prompt_diff.rb +39 -0
data/lib/ruby_llm/contract/eval/prompt_diff_comparator.rb +116 -0
data/lib/ruby_llm/contract/eval/prompt_diff_presenter.rb +99 -0
data/lib/ruby_llm/contract/eval/prompt_diff_serializer.rb +23 -0
data/lib/ruby_llm/contract/eval/report.rb +19 -191
data/lib/ruby_llm/contract/eval/report_presenter.rb +65 -0
data/lib/ruby_llm/contract/eval/report_stats.rb +65 -0
data/lib/ruby_llm/contract/eval/report_storage.rb +107 -0
data/lib/ruby_llm/contract/eval/runner.rb +30 -207
data/lib/ruby_llm/contract/eval/step_expectation_applier.rb +67 -0
data/lib/ruby_llm/contract/eval/step_result_normalizer.rb +39 -0
data/lib/ruby_llm/contract/eval.rb +13 -0
data/lib/ruby_llm/contract/pipeline/base.rb +10 -1
data/lib/ruby_llm/contract/rspec/pass_eval.rb +84 -3
data/lib/ruby_llm/contract/rspec.rb +5 -0
data/lib/ruby_llm/contract/step/adapter_caller.rb +23 -0
data/lib/ruby_llm/contract/step/base.rb +93 -38
data/lib/ruby_llm/contract/step/dsl.rb +10 -0
data/lib/ruby_llm/contract/step/input_validator.rb +34 -0
data/lib/ruby_llm/contract/step/limit_checker.rb +11 -11
data/lib/ruby_llm/contract/step/prompt_compiler.rb +33 -0
data/lib/ruby_llm/contract/step/result.rb +3 -2
data/lib/ruby_llm/contract/step/result_builder.rb +60 -0
data/lib/ruby_llm/contract/step/retry_executor.rb +1 -0
data/lib/ruby_llm/contract/step/runner.rb +46 -85
data/lib/ruby_llm/contract/step/runner_config.rb +37 -0
data/lib/ruby_llm/contract/step.rb +5 -0
data/lib/ruby_llm/contract/version.rb +1 -1
metadata +28 -1

data/lib/ruby_llm/contract/contract/schema_validator/node.rb ADDED Viewed

@@ -0,0 +1,70 @@
+# frozen_string_literal: true
+module RubyLLM
+  module Contract
+    class SchemaValidator
+      # Immutable validation context for one schema node and its current path.
+      class Node < Data.define(:value, :schema, :path)
+        def expected_type
+          schema[:type]&.to_s
+        end
+        def object_schema?
+          expected_type == "object" || schema.key?(:properties)
+        end
+        def hash?
+          value.is_a?(Hash)
+        end
+        def array?
+          value.is_a?(Array)
+        end
+        def numeric?
+          value.is_a?(Numeric)
+        end
+        def properties
+          schema[:properties] || {}
+        end
+        def required_fields
+          Array(schema[:required]).map(&:to_s)
+        end
+        def items_schema
+          schema[:items]
+        end
+        def key_present?(field)
+          symbolized = field.to_sym
+          value.key?(symbolized) || value.key?(field.to_s)
+        end
+        def field_value(field)
+          symbolized = field.to_sym
+          return value[symbolized] if value.key?(symbolized)
+          value[field.to_s]
+        end
+        def extra_keys
+          value.keys.map(&:to_s)
+        end
+        def qualify(field)
+          path ? "#{path}.#{field}" : field.to_s
+        end
+        def child(field, child_value, child_schema)
+          self.class.new(value: child_value, schema: child_schema, path: qualify(field))
+        end
+        def array_item(index, item, item_schema)
+          self.class.new(value: item, schema: item_schema, path: "#{path}[#{index}]")
+        end
+      end
+    end
+  end
+end

data/lib/ruby_llm/contract/contract/schema_validator/object_rules.rb ADDED Viewed

@@ -0,0 +1,66 @@
+# frozen_string_literal: true
+module RubyLLM
+  module Contract
+    class SchemaValidator
+      # Applies object-only validation rules to a schema node.
+      class ObjectRules
+        def initialize(errors)
+          @errors = errors
+        end
+        def validate(node)
+          validate_required_fields(node)
+          validate_properties(node) { |child| yield child }
+          validate_additional_properties(node)
+        end
+        private
+        def validate_required_fields(node)
+          node.required_fields.each do |field|
+            next if node.key_present?(field)
+            @errors << "missing required field: #{node.qualify(field)}"
+          end
+        end
+        def validate_properties(node)
+          required = node.required_fields
+          node.properties.each do |field, field_schema|
+            next unless node.key_present?(field)
+            value = node.field_value(field)
+            qualified = node.qualify(field)
+            if value.nil?
+              validate_nil_field(qualified, field_schema, required.include?(field.to_s))
+              next
+            end
+            yield node.child(field, value, field_schema)
+          end
+        end
+        def validate_nil_field(path, field_schema, required)
+          return unless required
+          expected_type = field_schema[:type] || "non-null"
+          @errors << "#{path}: expected #{expected_type}, got nil"
+        end
+        def validate_additional_properties(node)
+          return unless node.schema[:additionalProperties] == false
+          allowed_keys = node.properties.keys.map(&:to_s)
+          extra_keys = node.extra_keys.reject { |key| allowed_keys.include?(key) }
+          extra_keys.each do |extra_key|
+            @errors << "#{node.qualify(extra_key)}: additional property not allowed"
+          end
+        end
+      end
+    end
+  end
+end

data/lib/ruby_llm/contract/contract/schema_validator/scalar_rules.rb ADDED Viewed

@@ -0,0 +1,22 @@
+# frozen_string_literal: true
+module RubyLLM
+  module Contract
+    class SchemaValidator
+      # Applies scalar-only validation rules to a schema node.
+      class ScalarRules
+        def initialize(errors)
+          @rules = [
+            TypeRule.new(errors),
+            EnumRule.new(errors),
+            BoundRule.new(errors)
+          ]
+        end
+        def validate(node)
+          @rules.each { |rule| rule.validate(node) }
+        end
+      end
+    end
+  end
+end

data/lib/ruby_llm/contract/contract/schema_validator/schema_extractor.rb ADDED Viewed

@@ -0,0 +1,23 @@
+# frozen_string_literal: true
+module RubyLLM
+  module Contract
+    class SchemaValidator
+      class SchemaExtractor
+        include Concerns::DeepSymbolize
+        def call(schema)
+          schema_payload = schema.is_a?(Class) ? schema.new : schema
+          raw_schema = if schema_payload.respond_to?(:to_json_schema)
+                         json_schema = schema_payload.to_json_schema
+                         json_schema[:schema] || json_schema["schema"] || json_schema
+                       else
+                         schema
+                       end
+          deep_symbolize(raw_schema)
+        end
+      end
+    end
+  end
+end

data/lib/ruby_llm/contract/contract/schema_validator/type_rule.rb ADDED Viewed

@@ -0,0 +1,30 @@
+# frozen_string_literal: true
+module RubyLLM
+  module Contract
+    class SchemaValidator
+      # Validates the declared JSON schema type for a node.
+      class TypeRule
+        def initialize(errors)
+          @errors = errors
+        end
+        def validate(node)
+          expected_type = node.expected_type
+          value = node.value
+          return unless expected_type
+          return if type_valid?(expected_type, value)
+          @errors << "#{node.path}: expected #{expected_type}, got #{value.class}"
+        end
+        private
+        def type_valid?(expected_type, value)
+          checker = SchemaValidator::TYPE_CHECKS[expected_type]
+          checker ? checker.call(value) : true
+        end
+      end
+    end
+  end
+end

data/lib/ruby_llm/contract/contract/schema_validator.rb CHANGED Viewed

@@ -1,305 +1,80 @@
 # frozen_string_literal: true
+require_relative "schema_validator/node"
+require_relative "schema_validator/schema_extractor"
+require_relative "schema_validator/type_rule"
+require_relative "schema_validator/enum_rule"
+require_relative "schema_validator/bound_rule"
+require_relative "schema_validator/scalar_rules"
+require_relative "schema_validator/object_rules"
 module RubyLLM
   module Contract
     # Client-side validation of parsed output against an output_schema.
     # Checks required fields, enum constraints, number ranges, and nested objects.
     # This complements provider-side enforcement (with_schema) and catches
     # violations when using Test adapter or providers that ignore schemas.
-    class SchemaValidator # rubocop:disable Metrics/ClassLength
-      include Concerns::DeepSymbolize
-      # Bundles field path, value, and constraints to reduce parameter passing
-      FieldCheck = Struct.new(:qualified, :value, :constraints)
+    class SchemaValidator
       SIZE_BOUNDS = {
         string: { min_key: :minLength, max_key: :maxLength, metric: "length" },
         array: { min_key: :minItems, max_key: :maxItems, metric: "array length" }
       }.freeze
+      TYPE_CHECKS = {
+        "string" => ->(value) { value.is_a?(String) },
+        "integer" => ->(value) { value.is_a?(Integer) },
+        "number" => ->(value) { value.is_a?(Numeric) },
+        "boolean" => ->(value) { value.is_a?(TrueClass) || value.is_a?(FalseClass) },
+        "array" => ->(value) { value.is_a?(Array) },
+        "object" => ->(value) { value.is_a?(Hash) }
+      }.freeze
       def self.validate(parsed_output, schema)
         new(parsed_output, schema).validate
       end
       def initialize(parsed_output, schema)
-        @output = parsed_output
-        @json_schema = extract_schema(schema)
         @errors = []
+        json_schema = SchemaExtractor.new.call(schema)
+        path = root_object_schema?(json_schema) ? nil : "root"
+        @root_node = Node.new(value: parsed_output, schema: json_schema, path: path)
+        @scalar_rules = ScalarRules.new(@errors)
+        @object_rules = ObjectRules.new(@errors)
       end
       def validate
-        return [] unless @json_schema.is_a?(Hash)
-        return validate_non_hash_output unless @output.is_a?(Hash)
-        validate_object(@output, @json_schema, prefix: nil)
-        @errors
-      end
-      private
-      def validate_non_hash_output
-        expected_type = @json_schema[:type]&.to_s
-        if expected_type == "object" || @json_schema.key?(:properties)
-          return ["expected object, got #{@output.class}"]
-        end
-        errors = []
-        validate_type_match(errors, @output, expected_type, "root") if expected_type
-        validate_constraints(errors, @output, @json_schema, "root")
-        if expected_type == "array" && @output.is_a?(Array) && @json_schema[:items]
-          validate_array_items(errors, @output, @json_schema[:items], "")
-        end
-        errors
-      end
-      def validate_array_items(errors, array, items_schema, prefix)
-        array.each_with_index do |item, i|
-          item_prefix = "#{prefix}[#{i}]"
-          validate_value(errors, item, items_schema, item_prefix)
-        end
-      end
-      def validate_value(errors, value, schema, prefix)
-        value_type = schema[:type]&.to_s
-        validate_type_match(errors, value, value_type, prefix) if value_type
-        validate_constraints(errors, value, schema, prefix)
-        if value.is_a?(Hash) && (schema.key?(:properties) || value_type == "object")
-          validate_object(value, schema, prefix: prefix)
-          errors.concat(@errors)
-          @errors = []
-        elsif value.is_a?(Array) && schema[:items]
-          validate_array_items(errors, value, schema[:items], prefix)
-        end
-      end
-      def validate_type_match(errors, value, expected_type, prefix)
-        valid = case expected_type
-                when "string" then value.is_a?(String)
-                when "integer" then value.is_a?(Integer)
-                when "number" then value.is_a?(Numeric)
-                when "boolean" then value.is_a?(TrueClass) || value.is_a?(FalseClass)
-                when "array" then value.is_a?(Array)
-                else true
-                end
-        errors << "#{prefix}: expected #{expected_type}, got #{value.class}" unless valid
-      end
-      def validate_constraints(errors, value, schema, prefix)
-        if schema[:minimum] && value.is_a?(Numeric) && value < schema[:minimum]
-          errors << "#{prefix}: #{value} is less than minimum #{schema[:minimum]}"
-        end
-        if schema[:maximum] && value.is_a?(Numeric) && value > schema[:maximum]
-          errors << "#{prefix}: #{value} is greater than maximum #{schema[:maximum]}"
-        end
-        if schema[:enum] && !schema[:enum].include?(value)
-          errors << "#{prefix}: #{value.inspect} is not in enum #{schema[:enum].inspect}"
-        end
-        if schema[:minItems] && value.is_a?(Array) && value.length < schema[:minItems]
-          errors << "#{prefix}: array has #{value.length} items, minimum #{schema[:minItems]}"
-        end
-        if schema[:maxItems] && value.is_a?(Array) && value.length > schema[:maxItems]
-          errors << "#{prefix}: array has #{value.length} items, maximum #{schema[:maxItems]}"
-        end
-        if schema[:minLength] && value.is_a?(String) && value.length < schema[:minLength]
-          errors << "#{prefix}: string length #{value.length} is less than minLength #{schema[:minLength]}"
-        end
-        if schema[:maxLength] && value.is_a?(String) && value.length > schema[:maxLength]
-          errors << "#{prefix}: string length #{value.length} is greater than maxLength #{schema[:maxLength]}"
-        end
-      end
-      def extract_schema(schema)
-        instance = schema.is_a?(Class) ? schema.new : schema
-        json = if instance.respond_to?(:to_json_schema)
-                 schema_data = instance.to_json_schema
-                 schema_data[:schema] || schema_data["schema"] || schema_data
-               else
-                 schema
-               end
-        deep_symbolize(json)
-      end
-      def validate_object(output, schema, prefix:)
-        return unless output.is_a?(Hash) && schema.is_a?(Hash)
-        properties = schema[:properties] || {}
-        required = schema[:required] || []
-        check_required(required, output, prefix: prefix)
-        check_properties(properties, output, prefix: prefix, required_fields: required)
-        check_additional_properties(output, schema, prefix: prefix)
-      end
-      def check_required(required, output, prefix:)
-        required.each do |field|
-          key = field.to_s.to_sym
-          qualified = qualify(prefix, field)
-          @errors << "missing required field: #{qualified}" unless output.key?(key)
-        end
-      end
-      def check_properties(properties, output, prefix:, required_fields: [])
-        required_syms = required_fields.map { |field| field.to_s.to_sym }
-        properties.each do |field, constraints|
-          key = field.to_sym
-          value = output[key]
-          qualified = qualify(prefix, field)
-          if value.nil?
-            check_nil_required(qualified, key, constraints, required_syms, output)
-            next
-          end
+        return [] unless @root_node.schema.is_a?(Hash)
-          validate_field(FieldCheck.new(qualified: qualified, value: value, constraints: constraints))
-        end
-      end
-      def check_nil_required(qualified, key, constraints, required_syms, output)
-        return unless required_syms.include?(key) && output.key?(key)
-        expected = constraints[:type] || "non-null"
-        @errors << "#{qualified}: expected #{expected}, got nil"
-      end
-      def check_additional_properties(output, schema, prefix:)
-        return unless schema[:additionalProperties] == false
-        allowed_keys = (schema[:properties] || {}).keys.map { |prop_key| prop_key.to_s.to_sym }
-        extra_keys = output.keys - allowed_keys
-        extra_keys.each do |extra_key|
-          @errors << "#{qualify(prefix, extra_key)}: additional property not allowed"
-        end
-      end
-      def validate_field(field_check)
-        check_enum(field_check)
-        check_number_range(field_check)
-        check_type_constraint(field_check)
-        check_string_length(field_check)
-        check_nested(field_check)
-      end
-      def check_enum(field_check)
-        qualified, value, constraints = field_check.to_a
-        enum = constraints[:enum]
-        return unless enum
-        @errors << "#{qualified}: #{value.inspect} is not in enum #{enum.inspect}" unless enum.include?(value)
-      end
-      def check_number_range(field_check)
-        qualified, value, constraints = field_check.to_a
-        return unless value.is_a?(Numeric)
-        check_minimum(qualified, value, constraints[:minimum])
-        check_maximum(qualified, value, constraints[:maximum])
-      end
-      def check_type_constraint(field_check)
-        qualified, value, constraints = field_check.to_a
-        expected_type = constraints[:type]&.to_s
-        return unless expected_type
-        @errors << "#{qualified}: expected #{expected_type}, got #{value.class}" unless type_valid?(expected_type,
-                                                                                                    value)
-      end
-      def type_valid?(expected_type, value)
-        case expected_type
-        when "string" then value.is_a?(String)
-        when "number" then value.is_a?(Numeric)
-        when "integer" then value.is_a?(Integer)
-        when "boolean" then [true, false].include?(value)
-        when "array" then value.is_a?(Array)
-        when "object" then value.is_a?(Hash)
-        else true
-        end
-      end
-      def check_nested(field_check)
-        qualified, value, constraints = field_check.to_a
-        nested_type = constraints[:type]&.to_s
-        case nested_type
-        when "object"
-          validate_object(value, constraints, prefix: qualified) if value.is_a?(Hash)
-        when "array"
-          check_array_items(qualified, value, constraints) if value.is_a?(Array)
+        if @root_node.object_schema? && !@root_node.hash?
+          ["expected object, got #{@root_node.value.class}"]
+        else
+          validate_root
+          @errors
         end
       end
-      def check_string_length(field_check)
-        qualified, value, constraints = field_check.to_a
-        check_size_bounds(qualified, value.length, constraints, :string) if value.is_a?(String)
-      end
+      private
-      def check_array_length(qualified, value, constraints)
-        check_size_bounds(qualified, value.length, constraints, :array) if value.is_a?(Array)
+      def validate_root
+        validate_node(@root_node)
       end
-      def check_size_bounds(qualified, actual, constraints, kind)
-        bounds = SIZE_BOUNDS[kind]
-        check_size_minimum(qualified, actual, constraints[bounds[:min_key]], bounds)
-        check_size_maximum(qualified, actual, constraints[bounds[:max_key]], bounds)
+      def validate_node(node)
+        @scalar_rules.validate(node)
+        @object_rules.validate(node) { |child| validate_node(child) } if node.hash? && node.object_schema?
+        validate_array(node) if node.array?
       end
-      def check_array_items(qualified, value, constraints)
-        check_array_length(qualified, value, constraints)
-        items_schema = constraints[:items]
+      def validate_array(node)
+        items_schema = node.items_schema
         return unless items_schema.is_a?(Hash)
-        value.each_with_index do |item, idx|
-          validate_array_item("#{qualified}[#{idx}]", item, items_schema)
+        node.value.each_with_index do |item, index|
+          validate_node(node.array_item(index, item, items_schema))
         end
       end
-      def validate_array_item(item_key, item, items_schema)
-        item_type = items_schema[:type]&.to_s
-        if item_type == "object" && item.is_a?(Hash)
-          validate_object(item, items_schema, prefix: item_key)
-        elsif item_type == "array" && item.is_a?(Array)
-          check_array_items(item_key, item, items_schema)
-        else
-          validate_field(FieldCheck.new(qualified: item_key, value: item, constraints: items_schema))
-        end
-      end
-      def check_minimum(qualified, actual, limit)
-        return unless limit && actual < limit
-        @errors << "#{qualified}: #{actual} is below minimum #{limit}"
-      end
-      def check_maximum(qualified, actual, limit)
-        return unless limit && actual > limit
-        @errors << "#{qualified}: #{actual} is above maximum #{limit}"
-      end
-      def check_size_minimum(qualified, actual, limit, bounds)
-        return unless limit && actual < limit
-        @errors << "#{qualified}: #{bounds[:metric]} #{actual} is below #{bounds[:min_key]} #{limit}"
-      end
-      def check_size_maximum(qualified, actual, limit, bounds)
-        return unless limit && actual > limit
-        @errors << "#{qualified}: #{bounds[:metric]} #{actual} is above #{bounds[:max_key]} #{limit}"
-      end
-      def qualify(prefix, field)
-        prefix ? "#{prefix}.#{field}" : field.to_s
+      def root_object_schema?(schema)
+        schema[:type]&.to_s == "object" || schema.key?(:properties)
       end
     end
   end

data/lib/ruby_llm/contract/contract/validator.rb CHANGED Viewed

@@ -19,6 +19,15 @@ module RubyLLM
                      input: input, schema: schema)
       end
+      def self.run_observations(observers, parsed_output, input: nil)
+        observers.map do |obs|
+          passed = obs.call(parsed_output, input: input)
+          { description: obs.description, passed: !!passed }
+        rescue StandardError => e
+          { description: obs.description, passed: false, error: "#{e.class}: #{e.message}" }
+        end
+      end
       private
       def parse_error?(parsed_output)

data/lib/ruby_llm/contract/eval/case_executor.rb ADDED Viewed

@@ -0,0 +1,52 @@
+# frozen_string_literal: true
+module RubyLLM
+  module Contract
+    module Eval
+      class CaseExecutor
+        def initialize(step:, scorer: nil, normalizer: StepResultNormalizer.new,
+                       result_builder: CaseResultBuilder.new,
+                       step_expectation_applier: StepExpectationApplier.new)
+          @step = step
+          @scorer = scorer || CaseScorer.new(step: step)
+          @normalizer = normalizer
+          @result_builder = result_builder
+          @step_expectation_applier = step_expectation_applier
+        end
+        def call(test_case:, context:)
+          run_result = @step.run(test_case.input, context: context)
+          step_result = @normalizer.call(run_result)
+          evaluation = @scorer.call(test_case: test_case, step_result: step_result)
+          result = @result_builder.call(test_case: test_case, step_result: step_result, evaluation: evaluation)
+          @step_expectation_applier.call(result: result, run_result: run_result, test_case: test_case)
+        rescue RubyLLM::Contract::Error => error
+          raise unless missing_adapter?(error)
+          skipped_result(test_case, error.message)
+        end
+        private
+        def missing_adapter?(error)
+          error.message.include?("No adapter configured")
+        end
+        def skipped_result(test_case, reason)
+          CaseResult.new(
+            name: test_case.name,
+            input: test_case.input,
+            output: nil,
+            expected: test_case.expected,
+            step_status: :skipped,
+            score: 0.0,
+            passed: false,
+            label: "SKIP",
+            details: "skipped: #{reason}"
+          )
+        end
+      end
+    end
+  end
+end

data/lib/ruby_llm/contract/eval/case_result_builder.rb ADDED Viewed

@@ -0,0 +1,35 @@
+# frozen_string_literal: true
+module RubyLLM
+  module Contract
+    module Eval
+      class CaseResultBuilder
+        def call(test_case:, step_result:, evaluation:)
+          trace = step_result.respond_to?(:trace) ? step_result.trace : nil
+          CaseResult.new(
+            name: test_case.name,
+            input: test_case.input,
+            output: step_result.parsed_output,
+            expected: test_case.expected,
+            step_status: step_result.status,
+            score: evaluation.score,
+            passed: evaluation.passed,
+            label: evaluation.label,
+            details: evaluation.details,
+            duration_ms: trace_metric(trace, :total_latency_ms, :latency_ms),
+            cost: trace_metric(trace, :total_cost, :cost)
+          )
+        end
+        private
+        def trace_metric(trace, pipeline_key, step_key)
+          return nil unless trace
+          trace.respond_to?(pipeline_key) ? trace.public_send(pipeline_key) : trace[step_key]
+        end
+      end
+    end
+  end
+end