RubyGems - ruby_llm-contract - Versions diffs - 0.2.0 - Mend

ruby_llm-contract 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (89) hide show

checksums.yaml +7 -0
data/.rspec +3 -0
data/.rubocop.yml +55 -0
data/CHANGELOG.md +76 -0
data/Gemfile +11 -0
data/Gemfile.lock +176 -0
data/LICENSE +21 -0
data/README.md +154 -0
data/Rakefile +8 -0
data/examples/00_basics.rb +500 -0
data/examples/01_classify_threads.rb +220 -0
data/examples/02_generate_comment.rb +203 -0
data/examples/03_target_audience.rb +201 -0
data/examples/04_real_llm.rb +410 -0
data/examples/05_output_schema.rb +258 -0
data/examples/07_keyword_extraction.rb +239 -0
data/examples/08_translation.rb +353 -0
data/examples/09_eval_dataset.rb +287 -0
data/examples/10_reddit_full_showcase.rb +363 -0
data/examples/README.md +140 -0
data/lib/ruby_llm/contract/adapters/base.rb +13 -0
data/lib/ruby_llm/contract/adapters/response.rb +17 -0
data/lib/ruby_llm/contract/adapters/ruby_llm.rb +94 -0
data/lib/ruby_llm/contract/adapters/test.rb +44 -0
data/lib/ruby_llm/contract/adapters.rb +6 -0
data/lib/ruby_llm/contract/concerns/deep_symbolize.rb +17 -0
data/lib/ruby_llm/contract/concerns/eval_host.rb +109 -0
data/lib/ruby_llm/contract/concerns/trace_equality.rb +15 -0
data/lib/ruby_llm/contract/concerns/usage_aggregator.rb +43 -0
data/lib/ruby_llm/contract/configuration.rb +21 -0
data/lib/ruby_llm/contract/contract/definition.rb +39 -0
data/lib/ruby_llm/contract/contract/invariant.rb +23 -0
data/lib/ruby_llm/contract/contract/parser.rb +143 -0
data/lib/ruby_llm/contract/contract/schema_validator.rb +239 -0
data/lib/ruby_llm/contract/contract/validator.rb +104 -0
data/lib/ruby_llm/contract/contract.rb +7 -0
data/lib/ruby_llm/contract/cost_calculator.rb +38 -0
data/lib/ruby_llm/contract/dsl.rb +13 -0
data/lib/ruby_llm/contract/errors.rb +19 -0
data/lib/ruby_llm/contract/eval/case_result.rb +76 -0
data/lib/ruby_llm/contract/eval/contract_detail_builder.rb +47 -0
data/lib/ruby_llm/contract/eval/dataset.rb +53 -0
data/lib/ruby_llm/contract/eval/eval_definition.rb +112 -0
data/lib/ruby_llm/contract/eval/evaluation_result.rb +27 -0
data/lib/ruby_llm/contract/eval/evaluator/exact.rb +20 -0
data/lib/ruby_llm/contract/eval/evaluator/json_includes.rb +58 -0
data/lib/ruby_llm/contract/eval/evaluator/proc_evaluator.rb +40 -0
data/lib/ruby_llm/contract/eval/evaluator/regex.rb +27 -0
data/lib/ruby_llm/contract/eval/model_comparison.rb +80 -0
data/lib/ruby_llm/contract/eval/pipeline_result_adapter.rb +15 -0
data/lib/ruby_llm/contract/eval/report.rb +115 -0
data/lib/ruby_llm/contract/eval/runner.rb +162 -0
data/lib/ruby_llm/contract/eval/trait_evaluator.rb +75 -0
data/lib/ruby_llm/contract/eval.rb +16 -0
data/lib/ruby_llm/contract/pipeline/base.rb +62 -0
data/lib/ruby_llm/contract/pipeline/result.rb +131 -0
data/lib/ruby_llm/contract/pipeline/runner.rb +139 -0
data/lib/ruby_llm/contract/pipeline/trace.rb +72 -0
data/lib/ruby_llm/contract/pipeline.rb +6 -0
data/lib/ruby_llm/contract/prompt/ast.rb +38 -0
data/lib/ruby_llm/contract/prompt/builder.rb +47 -0
data/lib/ruby_llm/contract/prompt/node.rb +25 -0
data/lib/ruby_llm/contract/prompt/nodes/example_node.rb +27 -0
data/lib/ruby_llm/contract/prompt/nodes/rule_node.rb +15 -0
data/lib/ruby_llm/contract/prompt/nodes/section_node.rb +26 -0
data/lib/ruby_llm/contract/prompt/nodes/system_node.rb +15 -0
data/lib/ruby_llm/contract/prompt/nodes/user_node.rb +15 -0
data/lib/ruby_llm/contract/prompt/nodes.rb +7 -0
data/lib/ruby_llm/contract/prompt/renderer.rb +76 -0
data/lib/ruby_llm/contract/railtie.rb +20 -0
data/lib/ruby_llm/contract/rake_task.rb +78 -0
data/lib/ruby_llm/contract/rspec/pass_eval.rb +96 -0
data/lib/ruby_llm/contract/rspec/satisfy_contract.rb +31 -0
data/lib/ruby_llm/contract/rspec.rb +6 -0
data/lib/ruby_llm/contract/step/base.rb +138 -0
data/lib/ruby_llm/contract/step/dsl.rb +144 -0
data/lib/ruby_llm/contract/step/limit_checker.rb +64 -0
data/lib/ruby_llm/contract/step/result.rb +38 -0
data/lib/ruby_llm/contract/step/retry_executor.rb +90 -0
data/lib/ruby_llm/contract/step/retry_policy.rb +76 -0
data/lib/ruby_llm/contract/step/runner.rb +126 -0
data/lib/ruby_llm/contract/step/trace.rb +70 -0
data/lib/ruby_llm/contract/step.rb +10 -0
data/lib/ruby_llm/contract/token_estimator.rb +19 -0
data/lib/ruby_llm/contract/types.rb +11 -0
data/lib/ruby_llm/contract/version.rb +7 -0
data/lib/ruby_llm/contract.rb +108 -0
data/ruby_llm-contract.gemspec +33 -0
metadata +172 -0

data/examples/04_real_llm.rb ADDED Viewed

@@ -0,0 +1,410 @@
+# frozen_string_literal: true
+# =============================================================================
+# EXAMPLE 4: Real LLM calls via ruby_llm
+#
+# All previous examples used Adapters::Test with canned responses.
+# This example shows how to connect to a real LLM provider
+# (OpenAI, Anthropic, Google, etc.) using Adapters::RubyLLM.
+#
+# REQUIREMENTS:
+#   gem install ruby_llm
+#   export OPENAI_API_KEY=sk-...       # or any provider key
+#
+# RUN:
+#   ruby examples/04_real_llm.rb
+# =============================================================================
+require_relative "../lib/ruby_llm/contract"
+# =============================================================================
+# STEP 1: Configure — single block, API key auto-creates adapter
+#
+# Just set your API key. The adapter is created automatically.
+# =============================================================================
+RubyLLM::Contract.configure do |config|
+  config.openai_api_key = ENV.fetch("OPENAI_API_KEY", nil)
+  # config.anthropic_api_key = ENV.fetch("ANTHROPIC_API_KEY", nil)
+  config.default_model = "gpt-4.1-mini"
+end
+# =============================================================================
+# STEP 3: Define a step — identical to what you'd write with Test adapter
+#
+# The step doesn't know or care which adapter runs it.
+# Same types, same prompt, same contract.
+# =============================================================================
+class ClassifyIntent < RubyLLM::Contract::Step::Base
+  input_type String
+  output_schema do
+    string :intent, enum: %w[sales support billing other]
+    number :confidence, minimum: 0.0, maximum: 1.0
+  end
+  prompt do
+    system "You are an intent classifier for a customer support system."
+    rule "Return JSON only, no markdown."
+    example input: "I want to upgrade my plan",
+            output: '{"intent": "sales", "confidence": 0.95}'
+    example input: "My invoice is wrong",
+            output: '{"intent": "billing", "confidence": 0.9}'
+    user "{input}"
+  end
+end
+# =============================================================================
+# STEP 4: Run it — real LLM call, full contract enforcement
+#
+# The adapter sends the prompt to the real model.
+# The contract validates the response just like with Test adapter.
+# You get real token usage in the trace.
+# =============================================================================
+puts "Calling LLM..."
+result = ClassifyIntent.run("I can't log in to my account")
+puts "Status:  #{result.status}"                       # => :ok
+puts "Output:  #{result.parsed_output}"                # => {intent: "support", confidence: 0.95}
+puts "Model:   #{result.trace[:model]}"                # => "gpt-4.1-mini"
+puts "Latency: #{result.trace[:latency_ms]}ms"         # => 823ms (real network time)
+puts "Tokens:  #{result.trace[:usage]}"                # => {input_tokens: 142, output_tokens: 18}
+if result.ok?
+  puts "Intent:  #{result.parsed_output[:intent]}"
+else
+  puts "FAILED:  #{result.validation_errors}"
+end
+# =============================================================================
+# STEP 5: Override model per call — A/B test different models
+#
+# Use context to try different models without changing the step definition.
+# =============================================================================
+puts "\n--- Comparing models ---"
+%w[gpt-4.1-mini gpt-4.1-nano].each do |model|
+  r = ClassifyIntent.run("I need a refund", context: { model: model })
+  puts "#{model}: #{r.parsed_output} (#{r.trace[:latency_ms]}ms, #{r.trace[:usage]})"
+end
+# =============================================================================
+# STEP 6: Control generation params — temperature, max_tokens
+#
+# Options are forwarded to ruby_llm. Lower temperature = more deterministic.
+# =============================================================================
+puts "\n--- With temperature 0 ---"
+result = ClassifyIntent.run(
+  "Do you have an enterprise plan?",
+  context: { model: "gpt-4.1-mini", temperature: 0.0, max_tokens: 50 }
+)
+puts "Output: #{result.parsed_output}"
+# =============================================================================
+# STEP 7: Same step, different provider — just change the model
+#
+# If you have an Anthropic key configured, you can switch with one line.
+# The prompt, contract, and invariants are provider-agnostic.
+# =============================================================================
+# Uncomment if you have an Anthropic key:
+# puts "\n--- Anthropic ---"
+# result = ClassifyIntent.run(
+#   "I want to cancel my subscription",
+#   context: { model: "claude-sonnet-4-6" }
+# )
+# puts "Output: #{result.parsed_output}"
+# =============================================================================
+# STEP 8: Error handling — what happens when things go wrong
+#
+# Contract enforcement works the same with real LLM responses.
+# If the model returns something invalid, you get a clear error.
+# =============================================================================
+class StrictClassifier < RubyLLM::Contract::Step::Base
+  input_type String
+  output_schema do
+    string :sentiment, enum: %w[positive negative neutral]
+  end
+  prompt do
+    system "Classify the sentiment."
+    user "{input}"
+  end
+end
+puts "\n--- Strict classifier ---"
+result = StrictClassifier.run("This product is amazing!", context: { model: "gpt-4.1-mini" })
+if result.ok?
+  puts "Passed: #{result.parsed_output}"
+else
+  puts "Failed: #{result.status} — #{result.validation_errors}"
+  puts "Raw:    #{result.raw_output}"
+end
+# =============================================================================
+# STEP 9: Full power — every prompt feature combined with a real LLM
+#
+# This step uses EVERY feature from 00_basics.rb in a single definition:
+#   - system message (main instruction)
+#   - rules (individual requirements)
+#   - sections (labeled context blocks)
+#   - examples (few-shot input/output pairs)
+#   - Hash input (multi-field auto-interpolation)
+#   - 1-arity invariants (validate output alone)
+#   - 2-arity invariants (cross-validate output against input)
+#
+# All of it running against a real LLM.
+# =============================================================================
+class AnalyzeTicket < RubyLLM::Contract::Step::Base
+  input_type RubyLLM::Contract::Types::Hash.schema(
+    title: RubyLLM::Contract::Types::String,
+    body: RubyLLM::Contract::Types::String,
+    product: RubyLLM::Contract::Types::String,
+    customer_tier: RubyLLM::Contract::Types::String
+  )
+  output_type Hash
+  prompt do
+    system "You are a support ticket analyzer for a SaaS company."
+    rule "Return JSON only, no markdown, no explanation."
+    rule "Include all required fields: category, priority, sentiment, summary, suggested_action."
+    rule "Categories: billing, technical, feature_request, account, other."
+    rule "Priorities: low, medium, high, urgent."
+    rule "Sentiments: positive, negative, neutral, frustrated."
+    rule "Summary must be one sentence, max 100 characters."
+    section "PRODUCT CONTEXT", "Product: {product}\nCustomer tier: {customer_tier}"
+    section "PRIORITY RULES",
+            "urgent = data loss or security issue\n" \
+            "high = service down or billing error\n" \
+            "medium = feature broken but workaround exists\n" \
+            "low = question, feedback, or cosmetic issue"
+    example input: "Title: Can't export CSV\n\nBody: Export button returns 500 error since yesterday.",
+            output: '{"category":"technical","priority":"high","sentiment":"frustrated",' \
+                    '"summary":"CSV export returns 500 error","suggested_action":"escalate to engineering"}'
+    example input: "Title: Dark mode request\n\nBody: Would love dark mode for late night work!",
+            output: '{"category":"feature_request","priority":"low","sentiment":"positive",' \
+                    '"summary":"Requests dark mode feature","suggested_action":"add to feature backlog"}'
+    user "Title: {title}\n\nBody: {body}"
+  end
+  validate("category must be valid") do |o|
+    %w[billing technical feature_request account other].include?(o[:category])
+  end
+  validate("priority must be valid") do |o|
+    %w[low medium high urgent].include?(o[:priority])
+  end
+  validate("sentiment must be valid") do |o|
+    %w[positive negative neutral frustrated].include?(o[:sentiment])
+  end
+  validate("summary must be present") do |o|
+    !o[:summary].to_s.strip.empty?
+  end
+  validate("summary must be concise") do |o|
+    o[:summary].to_s.length <= 100
+  end
+  validate("suggested_action must be present") do |o|
+    !o[:suggested_action].to_s.strip.empty?
+  end
+  # 2-arity: cross-validate output against input
+  validate("urgent priority requires justification in body") do |output, input|
+    next true unless output[:priority] == "urgent"
+    body = input[:body].downcase
+    body.include?("data loss") || body.include?("security") ||
+      body.include?("breach") || body.include?("leak") || body.include?("deleted")
+  end
+end
+puts "\n--- Full power: AnalyzeTicket ---"
+result = AnalyzeTicket.run(
+  {
+    title: "All my projects disappeared",
+    body: "I logged in this morning and all 47 projects are gone. This is a data loss emergency. " \
+          "I have a client demo in 2 hours.",
+    product: "ProjectHub Pro",
+    customer_tier: "enterprise"
+  },
+  context: { model: "gpt-4.1-mini", temperature: 0.0 }
+)
+puts "Status:   #{result.status}"
+puts "Category: #{result.parsed_output&.dig(:category)}"
+puts "Priority: #{result.parsed_output&.dig(:priority)}"
+puts "Sentiment:#{result.parsed_output&.dig(:sentiment)}"
+puts "Summary:  #{result.parsed_output&.dig(:summary)}"
+puts "Action:   #{result.parsed_output&.dig(:suggested_action)}"
+puts "Latency:  #{result.trace[:latency_ms]}ms"
+puts "Tokens:   #{result.trace[:usage]}"
+puts "ERRORS:   #{result.validation_errors}" if result.failed?
+# =============================================================================
+# STEP 10: Full power — Pipeline + output_schema + invariants + real LLM
+#
+# This combines everything: 3-step pipeline where each step has its own
+# output_schema (provider-enforced), cross-validation invariants,
+# and real LLM calls. If any step hallucinates, execution stops.
+#
+# Use case: meeting transcript → follow-up email
+#   Step 1 (listener): extract decisions + action items
+#   Step 2 (critic): flag vague owners/deadlines
+#   Step 3 (writer): generate send-ready follow-up email
+# =============================================================================
+class ExtractMeetingItems < RubyLLM::Contract::Step::Base
+  input_type String
+  output_schema do
+    array :decisions do
+      string :id
+      string :description
+      string :made_by
+    end
+    array :action_items do
+      string :id
+      string :task
+      string :owner
+      string :deadline
+    end
+  end
+  prompt do
+    system "Extract decisions and action items from a meeting transcript."
+    rule "Only include decisions explicitly stated, never infer."
+    rule "Assign sequential IDs: D1, D2... for decisions, A1, A2... for action items."
+    user "{input}"
+  end
+end
+class AnalyzeAmbiguities < RubyLLM::Contract::Step::Base
+  input_type Hash
+  output_schema do
+    array :decisions do
+      string :id
+      string :description
+      string :made_by
+    end
+    array :action_items do
+      string :id
+      string :task
+      string :owner
+      string :deadline
+    end
+    array :analyses do
+      string :action_item_id
+      string :status, enum: %w[clear ambiguous]
+      array :issues do
+        string :field, enum: %w[owner deadline scope]
+        string :problem
+        string :clarification_question
+      end
+    end
+  end
+  prompt do
+    system "Review action items for completeness. Flag vague owners, missing deadlines, unclear scope."
+    rule "Pass through the original decisions and action_items unchanged."
+    rule "Add an analyses array with one entry per action item."
+    user "Decisions: {decisions}\n\nAction items: {action_items}"
+  end
+  # Cross-validate: every action item from step 1 must be analyzed
+  validate("all action items analyzed") do |output, input|
+    output[:analyses].map { |a| a[:action_item_id] }.sort ==
+      input[:action_items].map { |a| a[:id] }.sort
+  end
+end
+class GenerateMeetingEmail < RubyLLM::Contract::Step::Base
+  input_type Hash
+  output_schema do
+    string :subject
+    string :body
+  end
+  prompt do
+    system "Write a professional follow-up email. List decisions, clear action items " \
+           "with owners and deadlines, and embed clarification questions for ambiguous items."
+    user "Decisions: {decisions}\nAction items: {action_items}\nAnalyses: {analyses}"
+  end
+  validate("subject must be concise") { |o| o[:subject].length <= 80 }
+  validate("body must not be empty") { |o| !o[:body].to_s.strip.empty? }
+end
+class MeetingFollowUpPipeline < RubyLLM::Contract::Pipeline::Base
+  step ExtractMeetingItems,  as: :extract
+  step AnalyzeAmbiguities,   as: :analyze
+  step GenerateMeetingEmail, as: :email
+end
+transcript = <<~TRANSCRIPT
+  Sarah: Let's go with the new pricing model starting Q3.
+  Tom: I'll update the billing system... at some point.
+  Sarah: Someone should notify the sales team about the changes.
+  Tom: Also, we need to migrate the legacy accounts. Maria, can you handle that?
+  Maria: Sure, I'll look into it.
+TRANSCRIPT
+puts "\n--- Full power: Pipeline + Schema + Invariants + Real LLM ---"
+result = MeetingFollowUpPipeline.run(transcript,
+                                     context: { model: "gpt-4.1-mini", temperature: 0.0 })
+puts "Pipeline status: #{result.status}"
+puts "Steps run:       #{result.step_results.length}"
+if result.ok?
+  puts "\nExtracted:  #{result.outputs_by_step[:extract][:decisions]&.length} decisions, " \
+       "#{result.outputs_by_step[:extract][:action_items]&.length} action items"
+  ambiguous = result.outputs_by_step[:analyze][:analyses]&.select { |a| a[:status] == "ambiguous" }
+  puts "Ambiguous:  #{ambiguous&.length} items need clarification"
+  puts "Email subj: #{result.outputs_by_step[:email][:subject]}"
+else
+  puts "FAILED at:  #{result.failed_step}"
+  failed = result.step_results.last[:result]
+  puts "Errors:     #{failed.validation_errors}"
+end
+# =============================================================================
+# SUMMARY
+#
+# 1. Configure ruby_llm (API keys)
+# 2. Set adapter: Adapters::RubyLLM.new
+# 3. Define steps exactly as before (types, prompt, contract)
+# 4. Run — real LLM call with full contract enforcement
+# 5. Override model/temperature/max_tokens per call via context
+# 6. Switch providers by changing the model name — everything else stays
+# 7. Combine ALL features in a single step: system, rules, sections,
+#    examples, hash input, 1-arity + 2-arity invariants
+# 8. Error handling — contract enforcement with real LLM responses
+# 9. Full power single step — AnalyzeTicket with every feature
+# 10. Full power pipeline — 3 steps, schemas, invariants, real LLM
+#
+# The step definition is always provider-agnostic.
+# Swap adapters between Test (specs) and RubyLLM (production).
+# =============================================================================

data/examples/05_output_schema.rb ADDED Viewed

@@ -0,0 +1,258 @@
+# frozen_string_literal: true
+# =============================================================================
+# EXAMPLE 5: Declarative output schema (ruby_llm-schema)
+#
+# Replace manual invariants with a schema DSL.
+# The schema is sent to the LLM provider for structured output enforcement.
+#
+# With Test adapter: schema defines expectations, parsing is auto-inferred.
+# With RubyLLM adapter: schema is also enforced server-side by the provider.
+# =============================================================================
+require_relative "../lib/ruby_llm/contract"
+# =============================================================================
+# STEP 1: BEFORE — legacy approach with output_type + manual invariants
+#
+# Every enum, range, and required field is a separate invariant.
+# Works, but verbose. This is what you'd write WITHOUT output_schema.
+# =============================================================================
+RubyLLM::Contract.configure do |c|
+  c.default_adapter = RubyLLM::Contract::Adapters::Test.new(
+    response: '{"intent": "sales", "confidence": 0.95}'
+  )
+end
+class ClassifyIntentBefore < RubyLLM::Contract::Step::Base
+  input_type  String
+  output_type Hash
+  prompt do
+    system "Classify the user's intent."
+    rule "Return JSON only."
+    user "{input}"
+  end
+  validate("must include intent") { |o| o[:intent].to_s != "" }
+  validate("intent must be allowed") { |o| %w[sales support billing].include?(o[:intent]) }
+  validate("confidence must be a number") { |o| o[:confidence].is_a?(Numeric) }
+  validate("confidence in range") { |o| o[:confidence]&.between?(0.0, 1.0) }
+end
+result = ClassifyIntentBefore.run("I want to buy")
+result.status        # => :ok
+result.parsed_output # => {intent: "sales", confidence: 0.95}
+# =============================================================================
+# STEP 2: AFTER — output_schema replaces structural invariants
+#
+# Same constraints, but declared as a schema.
+# No `output_type`, no `parse :json`, no structural invariants.
+# =============================================================================
+class ClassifyIntentAfter < RubyLLM::Contract::Step::Base
+  input_type String
+  output_schema do
+    string :intent, enum: %w[sales support billing]
+    number :confidence, minimum: 0.0, maximum: 1.0
+  end
+  prompt do
+    system "Classify the user's intent."
+    rule "Return JSON only."
+    user "{input}"
+  end
+end
+result = ClassifyIntentAfter.run("I want to buy")
+result.status        # => :ok
+result.parsed_output # => {intent: "sales", confidence: 0.95}
+# =============================================================================
+# STEP 3: Schema + invariants — best of both worlds
+#
+# Schema handles structure (types, enums, ranges).
+# Invariants handle business logic (cross-validation, conditionals).
+# =============================================================================
+RubyLLM::Contract.configure do |c|
+  c.default_adapter = RubyLLM::Contract::Adapters::Test.new(
+    response: '{"category": "account", "priority": "urgent", "summary": "Projects disappeared"}'
+  )
+end
+class AnalyzeTicket < RubyLLM::Contract::Step::Base
+  input_type RubyLLM::Contract::Types::Hash.schema(
+    title: RubyLLM::Contract::Types::String,
+    body: RubyLLM::Contract::Types::String
+  )
+  output_schema do
+    string :category, enum: %w[billing technical feature_request account other]
+    string :priority, enum: %w[low medium high urgent]
+    string :summary, description: "One-sentence summary"
+  end
+  prompt do
+    system "Analyze support tickets."
+    rule "Return JSON with category, priority, and summary."
+    user "Title: {title}\n\nBody: {body}"
+  end
+  # Schema handles: valid category, valid priority, summary present
+  # Validate handles: cross-validation with input
+  validate("urgent requires justification") do |output, input|
+    next true unless output[:priority] == "urgent"
+    body = input[:body].downcase
+    body.include?("data loss") || body.include?("security") || body.include?("deleted")
+  end
+end
+# Justified urgent:
+result = AnalyzeTicket.run({
+                             title: "Projects disappeared",
+                             body: "All my projects are gone. This is a data loss emergency."
+                           })
+result.status # => :ok
+# Unjustified urgent:
+RubyLLM::Contract.configure do |c|
+  c.default_adapter = RubyLLM::Contract::Adapters::Test.new(
+    response: '{"category": "technical", "priority": "urgent", "summary": "Page is slow"}'
+  )
+end
+result = AnalyzeTicket.run({
+                             title: "Slow page",
+                             body: "Dashboard takes 5 seconds to load."
+                           })
+result.status            # => :validation_failed
+result.validation_errors # => ["urgent requires justification"]
+# =============================================================================
+# STEP 4: Complex schema — nested objects, arrays, optional fields
+# =============================================================================
+RubyLLM::Contract.configure do |c|
+  c.default_adapter = RubyLLM::Contract::Adapters::Test.new(
+    response: '{"locale": "en", "groups": [{"who": "Freelancers", "pain_points": ["invoicing", "time tracking"]}]}'
+  )
+end
+class ProfileAudience < RubyLLM::Contract::Step::Base
+  input_type RubyLLM::Contract::Types::Hash.schema(
+    product: RubyLLM::Contract::Types::String,
+    market: RubyLLM::Contract::Types::String
+  )
+  output_schema do
+    string :locale, description: "ISO 639-1 language code"
+    array :groups, min_items: 1, max_items: 4 do
+      string :who, description: "Audience segment name"
+      array :pain_points, of: :string, min_items: 1
+    end
+  end
+  prompt do
+    system "Generate target audience profiles."
+    user "Product: {product}, Market: {market}"
+  end
+end
+result = ProfileAudience.run({ product: "InvoiceApp", market: "US freelancers" })
+result.status        # => :ok
+result.parsed_output # => {locale: "en", groups: [{who: "Freelancers", pain_points: [...]}]}
+# =============================================================================
+# STEP 5: Schema is provider-agnostic
+#
+# With Test adapter: schema auto-infers JSON parsing, no provider enforcement.
+# With RubyLLM adapter: schema is ALSO sent to provider (structured output).
+# The step definition doesn't change.
+# =============================================================================
+# To use with a real LLM and get provider-side enforcement:
+#
+#   RubyLLM.configure { |c| c.openai_api_key = ENV["OPENAI_API_KEY"] }
+#   adapter = RubyLLM::Contract::Adapters::RubyLLM.new
+#   result = ClassifyIntentAfter.run("I want to buy",
+#     context: { adapter: adapter, model: "gpt-4.1-mini" })
+#
+# The provider enforces the schema — the model MUST return valid JSON
+# matching the schema. Parse errors become nearly impossible.
+# =============================================================================
+# STEP 6: Pipeline with schemas — each step has its own schema
+#
+# Pipeline + output_schema = fully typed, provider-enforced multi-step flow.
+# Each step declares its output schema. Data threads automatically.
+# =============================================================================
+RubyLLM::Contract.configure do |c|
+  c.default_adapter = RubyLLM::Contract::Adapters::Test.new(
+    response: '{"category": "billing", "priority": "high", "summary": "Payment page broken"}'
+  )
+end
+class TriageTicket < RubyLLM::Contract::Step::Base
+  input_type RubyLLM::Contract::Types::Hash.schema(title: RubyLLM::Contract::Types::String, body: RubyLLM::Contract::Types::String)
+  output_schema do
+    string :category, enum: %w[billing technical feature_request account other]
+    string :priority, enum: %w[low medium high urgent]
+    string :summary
+  end
+  prompt do
+    system "Triage support ticket."
+    user "Title: {title}\nBody: {body}"
+  end
+end
+class SuggestAction < RubyLLM::Contract::Step::Base
+  input_type Hash
+  output_schema do
+    string :action
+    string :team, enum: %w[engineering support billing product]
+    boolean :escalate
+  end
+  prompt do
+    system "Suggest action for a triaged ticket."
+    user "Category: {category}, Priority: {priority}, Summary: {summary}"
+  end
+end
+class TicketPipeline < RubyLLM::Contract::Pipeline::Base
+  step TriageTicket,  as: :triage
+  step SuggestAction, as: :action
+end
+# Both steps share the test adapter, so they get the same canned response.
+# With a real LLM, step 2 would get a different response based on step 1's output.
+result = TicketPipeline.run(
+  { title: "Payment page broken", body: "Error 500 on checkout" }
+)
+result.ok?                        # => true
+result.outputs_by_step[:triage]   # => {category: "billing", priority: "high", summary: "..."}
+result.outputs_by_step[:action]   # => same canned response (test adapter)
+result.step_results.length        # => 2
+# =============================================================================
+# SUMMARY
+#
+# Step 1: BEFORE — output_type + parse :json + structural invariants
+# Step 2: AFTER — output_schema replaces all of that
+# Step 3: Schema + invariants — schema for structure, invariants for logic
+# Step 4: Complex schemas — nested objects, arrays, constraints
+# Step 5: Provider-agnostic — same schema works with Test and RubyLLM
+# Step 6: Pipeline + schemas — fully typed multi-step composition
+#
+# output_schema is optional. Existing steps with output_type + invariants
+# continue to work unchanged.
+# =============================================================================