RubyGems - ruby_llm-contract - Versions diffs - 0.2.0 - Mend

ruby_llm-contract 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (89) hide show

checksums.yaml +7 -0
data/.rspec +3 -0
data/.rubocop.yml +55 -0
data/CHANGELOG.md +76 -0
data/Gemfile +11 -0
data/Gemfile.lock +176 -0
data/LICENSE +21 -0
data/README.md +154 -0
data/Rakefile +8 -0
data/examples/00_basics.rb +500 -0
data/examples/01_classify_threads.rb +220 -0
data/examples/02_generate_comment.rb +203 -0
data/examples/03_target_audience.rb +201 -0
data/examples/04_real_llm.rb +410 -0
data/examples/05_output_schema.rb +258 -0
data/examples/07_keyword_extraction.rb +239 -0
data/examples/08_translation.rb +353 -0
data/examples/09_eval_dataset.rb +287 -0
data/examples/10_reddit_full_showcase.rb +363 -0
data/examples/README.md +140 -0
data/lib/ruby_llm/contract/adapters/base.rb +13 -0
data/lib/ruby_llm/contract/adapters/response.rb +17 -0
data/lib/ruby_llm/contract/adapters/ruby_llm.rb +94 -0
data/lib/ruby_llm/contract/adapters/test.rb +44 -0
data/lib/ruby_llm/contract/adapters.rb +6 -0
data/lib/ruby_llm/contract/concerns/deep_symbolize.rb +17 -0
data/lib/ruby_llm/contract/concerns/eval_host.rb +109 -0
data/lib/ruby_llm/contract/concerns/trace_equality.rb +15 -0
data/lib/ruby_llm/contract/concerns/usage_aggregator.rb +43 -0
data/lib/ruby_llm/contract/configuration.rb +21 -0
data/lib/ruby_llm/contract/contract/definition.rb +39 -0
data/lib/ruby_llm/contract/contract/invariant.rb +23 -0
data/lib/ruby_llm/contract/contract/parser.rb +143 -0
data/lib/ruby_llm/contract/contract/schema_validator.rb +239 -0
data/lib/ruby_llm/contract/contract/validator.rb +104 -0
data/lib/ruby_llm/contract/contract.rb +7 -0
data/lib/ruby_llm/contract/cost_calculator.rb +38 -0
data/lib/ruby_llm/contract/dsl.rb +13 -0
data/lib/ruby_llm/contract/errors.rb +19 -0
data/lib/ruby_llm/contract/eval/case_result.rb +76 -0
data/lib/ruby_llm/contract/eval/contract_detail_builder.rb +47 -0
data/lib/ruby_llm/contract/eval/dataset.rb +53 -0
data/lib/ruby_llm/contract/eval/eval_definition.rb +112 -0
data/lib/ruby_llm/contract/eval/evaluation_result.rb +27 -0
data/lib/ruby_llm/contract/eval/evaluator/exact.rb +20 -0
data/lib/ruby_llm/contract/eval/evaluator/json_includes.rb +58 -0
data/lib/ruby_llm/contract/eval/evaluator/proc_evaluator.rb +40 -0
data/lib/ruby_llm/contract/eval/evaluator/regex.rb +27 -0
data/lib/ruby_llm/contract/eval/model_comparison.rb +80 -0
data/lib/ruby_llm/contract/eval/pipeline_result_adapter.rb +15 -0
data/lib/ruby_llm/contract/eval/report.rb +115 -0
data/lib/ruby_llm/contract/eval/runner.rb +162 -0
data/lib/ruby_llm/contract/eval/trait_evaluator.rb +75 -0
data/lib/ruby_llm/contract/eval.rb +16 -0
data/lib/ruby_llm/contract/pipeline/base.rb +62 -0
data/lib/ruby_llm/contract/pipeline/result.rb +131 -0
data/lib/ruby_llm/contract/pipeline/runner.rb +139 -0
data/lib/ruby_llm/contract/pipeline/trace.rb +72 -0
data/lib/ruby_llm/contract/pipeline.rb +6 -0
data/lib/ruby_llm/contract/prompt/ast.rb +38 -0
data/lib/ruby_llm/contract/prompt/builder.rb +47 -0
data/lib/ruby_llm/contract/prompt/node.rb +25 -0
data/lib/ruby_llm/contract/prompt/nodes/example_node.rb +27 -0
data/lib/ruby_llm/contract/prompt/nodes/rule_node.rb +15 -0
data/lib/ruby_llm/contract/prompt/nodes/section_node.rb +26 -0
data/lib/ruby_llm/contract/prompt/nodes/system_node.rb +15 -0
data/lib/ruby_llm/contract/prompt/nodes/user_node.rb +15 -0
data/lib/ruby_llm/contract/prompt/nodes.rb +7 -0
data/lib/ruby_llm/contract/prompt/renderer.rb +76 -0
data/lib/ruby_llm/contract/railtie.rb +20 -0
data/lib/ruby_llm/contract/rake_task.rb +78 -0
data/lib/ruby_llm/contract/rspec/pass_eval.rb +96 -0
data/lib/ruby_llm/contract/rspec/satisfy_contract.rb +31 -0
data/lib/ruby_llm/contract/rspec.rb +6 -0
data/lib/ruby_llm/contract/step/base.rb +138 -0
data/lib/ruby_llm/contract/step/dsl.rb +144 -0
data/lib/ruby_llm/contract/step/limit_checker.rb +64 -0
data/lib/ruby_llm/contract/step/result.rb +38 -0
data/lib/ruby_llm/contract/step/retry_executor.rb +90 -0
data/lib/ruby_llm/contract/step/retry_policy.rb +76 -0
data/lib/ruby_llm/contract/step/runner.rb +126 -0
data/lib/ruby_llm/contract/step/trace.rb +70 -0
data/lib/ruby_llm/contract/step.rb +10 -0
data/lib/ruby_llm/contract/token_estimator.rb +19 -0
data/lib/ruby_llm/contract/types.rb +11 -0
data/lib/ruby_llm/contract/version.rb +7 -0
data/lib/ruby_llm/contract.rb +108 -0
data/ruby_llm-contract.gemspec +33 -0
metadata +172 -0

data/examples/01_classify_threads.rb ADDED Viewed

@@ -0,0 +1,220 @@
+# frozen_string_literal: true
+# =============================================================================
+# EXAMPLE 1: Thread Classification (PROMO / FILLER / SKIP)
+#
+# Real-world case: A Reddit promotion planner needs to classify threads
+# into PROMO (worth commenting with a product link), FILLER (worth a
+# genuine comment without product mention), or SKIP (irrelevant).
+# =============================================================================
+require_relative "../lib/ruby_llm/contract"
+# =============================================================================
+# BEFORE: Legacy approach (inline heredoc + ad-hoc validation)
+# =============================================================================
+#
+# In the legacy codebase, this lives across multiple concern files:
+# - classification_prompts.rb (prompt building)
+# - thread_classification.rb (LLM calling + parsing)
+# - llm_result_mapper.rb (ID matching with positional fallback)
+#
+# ```ruby
+# # classification_prompts.rb
+# def build_classify_prompt(items)
+#   <<~PROMPT
+#     #{classify_product_header}
+#     #{classify_sitemap_section}
+#     Classify each Reddit thread below for this product's promotion campaign.
+#
+#     For each thread, decide:
+#     #{classify_decision_rules}
+#
+#     IMPORTANT: Be careful with PROMO. Follow these rules:
+#     #{classify_promo_caution_rules}
+#
+#     Also provide:
+#     #{classify_output_fields}
+#
+#     Threads:
+#     #{items.to_json}
+#   PROMPT
+# end
+#
+# # thread_classification.rb
+# def classify_batch_via_llm(batch)
+#   items = build_classify_items(batch)
+#   prompt = build_classify_prompt(items)
+#   response = ai_call(prompt, schema: classify_response_schema)
+#   parsed = parse_llm_json(response)
+#   # Manual ID matching with positional fallback (masks errors!)
+#   map_llm_results_by_id(items, parsed["threads"])
+# end
+# ```
+#
+# PROBLEMS:
+# - Prompt is a string concatenation of 6 helper methods
+# - No contract on output — if model returns wrong enum, it silently propagates
+# - ID matching has a positional fallback that masks when model rewrites IDs
+# - No way to test prompt quality without hitting the API
+# - Change one line in classify_promo_caution_rules → no idea what broke
+# =============================================================================
+# AFTER: ruby_llm-contract approach
+# =============================================================================
+class ClassifyThreads < RubyLLM::Contract::Step::Base
+  input_type  RubyLLM::Contract::Types::Array.of(RubyLLM::Contract::Types::Hash)
+  output_type RubyLLM::Contract::Types::Array.of(RubyLLM::Contract::Types::Hash)
+  prompt do
+    system "You classify Reddit threads for a product promotion campaign."
+    rule "For each thread, classify as PROMO, FILLER, or SKIP."
+    rule "PROMO: thread author has a problem where the product helps naturally."
+    rule "FILLER: related to domain, good for a genuine comment without product mention."
+    rule "SKIP: irrelevant, low engagement, hostile to recommendations, grief/politics."
+    rule "Return a JSON array with id, classification, relevance_score (0-10), and thread_intent."
+    rule "thread_intent must be one of: seeking_help, sharing, discussion, venting."
+    section "SCORING GUIDE", <<~GUIDE
+      8-10: Clear problem/situation the product solves
+      5-7: Author is in target audience, link would fit naturally
+      2-4: Same broad domain but weak connection
+      0-1: Irrelevant
+    GUIDE
+    user "{input}"
+  end
+  # Structural: every input ID must appear in output
+  validate("all thread IDs must match input") do |output, input|
+    output.map { |r| r[:id] }.sort == input.map { |t| t[:id] }.sort
+  end
+  # Enum: classification must be valid
+  validate("classification must be PROMO, FILLER, or SKIP") do |output|
+    output.all? { |r| %w[PROMO FILLER SKIP].include?(r[:classification]) }
+  end
+  # Consistency: PROMO threads must have decent relevance
+  validate("PROMO threads must have relevance_score >= 5") do |output|
+    output.select { |r| r[:classification] == "PROMO" }
+          .all? { |r| r[:relevance_score].is_a?(Integer) && r[:relevance_score] >= 5 }
+  end
+  # Enum: thread_intent must be valid
+  validate("thread_intent must be valid") do |output|
+    valid = %w[seeking_help sharing discussion venting]
+    output.all? { |r| valid.include?(r[:thread_intent]) }
+  end
+end
+# =============================================================================
+# AFTER + SCHEMA: output_schema replaces structural invariants
+#
+# Compare with the version above:
+# - classification enum → schema
+# - thread_intent enum → schema
+# - relevance_score type/range → schema
+# - ID matching → still an invariant (cross-validation with input)
+# - PROMO score check → still an invariant (conditional logic)
+# =============================================================================
+class ClassifyThreadsWithSchema < RubyLLM::Contract::Step::Base
+  input_type RubyLLM::Contract::Types::Array.of(RubyLLM::Contract::Types::Hash)
+  output_schema do
+    array :threads do
+      string :id
+      string :classification, enum: %w[PROMO FILLER SKIP]
+      integer :relevance_score, minimum: 0, maximum: 10
+      string :thread_intent, enum: %w[seeking_help sharing discussion venting]
+    end
+  end
+  prompt do
+    system "You classify Reddit threads for a product promotion campaign."
+    rule "For each thread, classify as PROMO, FILLER, or SKIP."
+    rule "PROMO: thread author has a problem where the product helps naturally."
+    rule "FILLER: related to domain, good for a genuine comment without product mention."
+    rule "SKIP: irrelevant, low engagement, hostile to recommendations, grief/politics."
+    rule "Return JSON with a threads array. Each entry: id, classification, relevance_score (0-10), thread_intent."
+    rule "thread_intent must be one of: seeking_help, sharing, discussion, venting."
+    section "SCORING GUIDE", <<~GUIDE
+      8-10: Clear problem/situation the product solves
+      5-7: Author is in target audience, link would fit naturally
+      2-4: Same broad domain but weak connection
+      0-1: Irrelevant
+    GUIDE
+    user "{input}"
+  end
+  # Only custom business logic — structural constraints are in the schema
+  validate("all thread IDs must match input") do |output, input|
+    output[:threads].map { |r| r[:id] }.sort == input.map { |t| t[:id] }.sort
+  end
+  validate("PROMO threads must have relevance_score >= 5") do |output|
+    output[:threads].select { |r| r[:classification] == "PROMO" }
+                    .all? { |r| r[:relevance_score] >= 5 }
+  end
+end
+# =============================================================================
+# DEMO: Run with test adapter
+# =============================================================================
+sample_threads = [
+  { id: "t1", subreddit: "crochet", title: "spent way too much on yarn this month lol", selftext: "anyone else?" },
+  { id: "t2", subreddit: "gaming", title: "my cat destroyed my controller", selftext: "RIP" },
+  { id: "t3", subreddit: "deals", title: "best craft supply deals?", selftext: "looking for yarn and fabric sales" }
+]
+# Happy path — valid response
+valid_response = [
+  { id: "t1", classification: "PROMO", relevance_score: 7, thread_intent: "venting", matched_page: "/yarn-deals" },
+  { id: "t2", classification: "SKIP", relevance_score: 1, thread_intent: "venting", matched_page: "" },
+  { id: "t3", classification: "PROMO", relevance_score: 9, thread_intent: "seeking_help", matched_page: "/craft-deals" }
+].to_json
+adapter = RubyLLM::Contract::Adapters::Test.new(response: valid_response)
+result = ClassifyThreads.run(sample_threads, context: { adapter: adapter, model: "gpt-5-mini" })
+puts "=== HAPPY PATH ==="
+puts "Status: #{result.status}"
+puts "Parsed output: #{result.parsed_output.map { |r| "#{r[:id]}=#{r[:classification]}" }.join(", ")}"
+puts "Validation errors: #{result.validation_errors}"
+puts
+# Bad path — model returns wrong enum
+bad_response = [
+  { id: "t1", classification: "MAYBE", relevance_score: 7, thread_intent: "venting" },
+  { id: "t2", classification: "SKIP", relevance_score: 1, thread_intent: "venting" },
+  { id: "t3", classification: "PROMO", relevance_score: 9, thread_intent: "seeking_help" }
+].to_json
+bad_adapter = RubyLLM::Contract::Adapters::Test.new(response: bad_response)
+result = ClassifyThreads.run(sample_threads, context: { adapter: bad_adapter })
+puts "=== BAD ENUM ==="
+puts "Status: #{result.status}"
+puts "Validation errors: #{result.validation_errors}"
+puts
+# Bad path — model rewrites IDs (the silent bug legacy code masked with positional fallback)
+rewritten_ids_response = [
+  { id: "thread_1", classification: "PROMO", relevance_score: 7, thread_intent: "venting" },
+  { id: "thread_2", classification: "SKIP", relevance_score: 1, thread_intent: "venting" },
+  { id: "thread_3", classification: "PROMO", relevance_score: 9, thread_intent: "seeking_help" }
+].to_json
+rewritten_adapter = RubyLLM::Contract::Adapters::Test.new(response: rewritten_ids_response)
+result = ClassifyThreads.run(sample_threads, context: { adapter: rewritten_adapter })
+puts "=== REWRITTEN IDs (legacy code would silently fallback to positional matching) ==="
+puts "Status: #{result.status}"
+puts "Validation errors: #{result.validation_errors}"

data/examples/02_generate_comment.rb ADDED Viewed

@@ -0,0 +1,203 @@
+# frozen_string_literal: true
+# =============================================================================
+# EXAMPLE 2: Promo Comment Generation
+#
+# Real-world case: Generate a Reddit comment that subtly promotes a product.
+# The comment must match the thread's language, sound like a real user,
+# include a product link naturally, and follow strict persona rules.
+# =============================================================================
+require_relative "../lib/ruby_llm/contract"
+# =============================================================================
+# BEFORE: Legacy approach (200+ lines across multiple concerns)
+# =============================================================================
+#
+# In the legacy codebase, the prompt is assembled from 6+ helper methods
+# across comment_prompts.rb (240 lines):
+#
+# ```ruby
+# # System message built from 8 sections:
+# def system_message_for_promo
+#   base_system_message(
+#     intro: "You write Reddit comments that subtly promote a product...",
+#     voice_lines: [
+#       "Sound like a genuine user who found something useful, not an ad.",
+#       'Never say "I built" or "I made this".',
+#       "Sound like a real Reddit user: casual, no marketing speak...",
+#       # ... 10 more rules
+#     ],
+#     self_contained_lines: [...],
+#     extra_sections: [section("PROMO LINKING BASICS", [...])]
+#   )
+# end
+#
+# # User prompt built by string concatenation:
+# def build_promo_prompt(items, strict_language: false, comment_plan: nil)
+#   [
+#     <<~PRODUCT.strip,
+#     [PRODUCT]
+#     Domain: #{@url}
+#     PRODUCT
+#     pages_section,
+#     section("URL SELECTION", promo_url_selection_rules),
+#     section("PRODUCT MENTION", promo_product_mention_rules + [...]),
+#     comment_plan_section,
+#     comment_plan_rules,
+#     strict_lang_section,
+#     <<~ITEMS.strip
+#       [ITEMS]
+#       #{items.to_json}
+#     ITEMS
+#   ].compact.join("\n\n")
+# end
+# ```
+#
+# PROBLEMS:
+# - 200+ lines of string building spread across 8+ methods
+# - No validation on output — wrong language silently passes, caught later
+# - Persona + voice + rules mixed with data (URL, pages, items)
+# - Change one voice rule → no way to measure impact on output quality
+# - The `.compact.join("\n\n")` pattern is fragile — easy to break structure
+# =============================================================================
+# AFTER: ruby_llm-contract approach
+# =============================================================================
+PERSONA = <<~PERSONA.strip
+  You are a woman, 40+, a maker. You solve your own problems by building \
+  software. Outside of code you crochet, sew, 3D-print, and do \
+  astrophotography with your son. You hunt deals both offline and online. \
+  Your writing style: reflective, inventive, casual. You make typos, skip \
+  Polish diacritics sometimes, write like a real person on a forum.
+PERSONA
+class GeneratePromoComment < RubyLLM::Contract::Step::Base
+  input_type RubyLLM::Contract::Types::Hash.schema(
+    thread_title: RubyLLM::Contract::Types::String,
+    thread_selftext: RubyLLM::Contract::Types::String,
+    subreddit: RubyLLM::Contract::Types::String,
+    target_length: RubyLLM::Contract::Types::Integer,
+    thread_language: RubyLLM::Contract::Types::String,
+    product_url: RubyLLM::Contract::Types::String,
+    matched_page_url: RubyLLM::Contract::Types::String
+  )
+  output_type Hash
+  prompt do
+    system "You write Reddit comments that subtly promote a product. Return valid JSON only."
+    section "PERSONA", PERSONA
+    rule "Sound like a genuine user who found something useful, not an ad."
+    rule 'Never say "I built" or "I made this".'
+    rule "Casual tone, no marketing speak, no emojis, no bullet points."
+    rule "Pick one specific angle and share it concretely."
+    rule "Be opinionated; say what worked for you, not generic balanced advice."
+    rule 'NEVER start with "Nice X", "Cool X", "Love this". Jump straight into your point.'
+    rule "Give 2-3 options; the product link should be ONE of them, not the whole point."
+    rule "The comment must stand without the link."
+    rule 'Do not introduce the link with "PS:", "btw:", or parenthetical asides.'
+    rule "No markdown headers or formatting. Plain text only."
+    rule "Write in {thread_language}."
+    rule "Approximately {target_length} characters (±20%)."
+    section "PRODUCT", "Domain: {product_url}\nPage: {matched_page_url}"
+    user "r/{subreddit}: {thread_title}\n\n{thread_selftext}\n\nWrite a helpful comment."
+  end
+  validate("comment must not be empty") do |o|
+    o[:comment].is_a?(String) && o[:comment].strip.length > 10
+  end
+  validate("no markdown headers") do |o|
+    !o[:comment].to_s.match?(/^\#{2,}\s/)
+  end
+  validate("no emojis") do |o|
+    !o[:comment].to_s.match?(/[\u{1F600}-\u{1F64F}\u{1F300}-\u{1F5FF}\u{1F680}-\u{1F6FF}]/)
+  end
+  validate("includes product link") do |o, input|
+    o[:comment].to_s.include?(input[:matched_page_url])
+  end
+  validate("length within ±30% of target") do |o, input|
+    len = o[:comment].to_s.length
+    target = input[:target_length]
+    len.between?((target * 0.7).to_i, (target * 1.3).to_i)
+  end
+  validate("does not start with banned openings") do |o|
+    banned = ["Nice ", "Cool ", "Love this", "Great ", "Totally agree"]
+    banned.none? { |b| o[:comment].to_s.start_with?(b) }
+  end
+end
+# =============================================================================
+# DEMO: Run with test adapter
+# =============================================================================
+input = {
+  thread_title: "spent way too much on yarn this month lol",
+  thread_selftext: "Between Drops and the new Scheepjes line I'm broke. Anyone else track their spending?",
+  subreddit: "crochet",
+  target_length: 200,
+  thread_language: "en",
+  product_url: "https://deals.example.com",
+  matched_page_url: "https://deals.example.com/yarn-deals"
+}
+# Happy path — good comment
+good_comment = {
+  comment: "Ugh same. I started tracking last year and the numbers were brutal. " \
+           "What helped — monthly yarn budget plus checking https://deals.example.com/yarn-deals " \
+           "before impulse buying. Ravelry destash groups too."
+}.to_json
+adapter = RubyLLM::Contract::Adapters::Test.new(response: good_comment)
+result = GeneratePromoComment.run(input, context: { adapter: adapter })
+puts "=== HAPPY PATH ==="
+puts "Status: #{result.status}"
+puts "Comment: #{result.parsed_output[:comment]}"
+puts "Validation errors: #{result.validation_errors}"
+puts
+# Bad path — starts with banned opening
+bad_comment = {
+  comment: "Nice question! I track my yarn spending with a spreadsheet and also check " \
+           "https://deals.example.com/yarn-deals for sales."
+}.to_json
+bad_adapter = RubyLLM::Contract::Adapters::Test.new(response: bad_comment)
+result = GeneratePromoComment.run(input, context: { adapter: bad_adapter })
+puts "=== BANNED OPENING ==="
+puts "Status: #{result.status}"
+puts "Validation errors: #{result.validation_errors}"
+puts
+# Bad path — missing product link
+no_link_comment = {
+  comment: "Same here. I started a spreadsheet and realized I spent way more than I thought. " \
+           "Ravelry destash groups are great for cheap yarn though."
+}.to_json
+no_link_adapter = RubyLLM::Contract::Adapters::Test.new(response: no_link_comment)
+result = GeneratePromoComment.run(input, context: { adapter: no_link_adapter })
+puts "=== MISSING LINK ==="
+puts "Status: #{result.status}"
+puts "Validation errors: #{result.validation_errors}"
+puts
+# Inspect the rendered prompt AST
+puts "=== RENDERED PROMPT (first 3 messages) ==="
+adapter = RubyLLM::Contract::Adapters::Test.new(response: good_comment)
+result = GeneratePromoComment.run(input, context: { adapter: adapter })
+result.trace[:messages].first(3).each do |msg|
+  puts "  [#{msg[:role]}] #{msg[:content][0..80]}..."
+end

data/examples/03_target_audience.rb ADDED Viewed

@@ -0,0 +1,201 @@
+# frozen_string_literal: true
+# =============================================================================
+# EXAMPLE 3: Target Audience Generation
+#
+# Real-world case: Analyze a product URL and generate audience profiles.
+# This is stage 1 of the pipeline — if it fails, everything downstream
+# breaks (subreddit discovery, thread classification, comment generation).
+# =============================================================================
+require_relative "../lib/ruby_llm/contract"
+# =============================================================================
+# BEFORE: Legacy approach (prompt + schema in concern, ad-hoc validation)
+# =============================================================================
+#
+# In the legacy codebase, this is in target_audience_prompts.rb:
+#
+# ```ruby
+# def build_audience_profile_prompt(plan, pages)
+#   <<~PROMPT
+#     Analyze this webpage. First, understand what the product/service does.
+#     Then figure out who the TARGET AUDIENCE is.
+#
+#     #{product_input_context(plan, pages)}
+#
+#     ---
+#
+#     Generate:
+#     1. LOCALE: Detect the page language. Return ISO 639-1 code.
+#     2. DESCRIPTION: Write exactly 1 sentence (max 15 words): WHAT it is.
+#     3. Identify 2-3 distinct target audience groups.
+#
+#     CRITICAL: Describe groups by their LIFE SITUATION and EVERYDAY PROBLEMS...
+#     [... 40 more lines of instructions ...]
+#   PROMPT
+# end
+#
+# # Validation is ad-hoc, buried in the caller:
+# def valid_product_context?(context)
+#   context.is_a?(Hash) &&
+#     context["locale"].present? &&
+#     context["description"].present? &&
+#     context["groups"].is_a?(Array) &&
+#     context["groups"].size >= 1
+# end
+# ```
+#
+# PROBLEMS:
+# - 50-line heredoc string — impossible to diff meaningfully
+# - Validation is a separate method, easy to forget to call
+# - If locale is wrong (e.g. "english" instead of "en"), it passes validation
+# - If groups are present but empty/garbage, no way to catch it
+# - Failure here silently poisons all 6 downstream stages
+# =============================================================================
+# AFTER: ruby_llm-contract approach
+# =============================================================================
+class GenerateTargetAudience < RubyLLM::Contract::Step::Base
+  input_type RubyLLM::Contract::Types::Hash.schema(
+    url: RubyLLM::Contract::Types::String,
+    body_text: RubyLLM::Contract::Types::String,
+    sitemap_pages: RubyLLM::Contract::Types::Array.of(RubyLLM::Contract::Types::Hash)
+  )
+  output_type Hash
+  prompt do
+    system "Analyze a product webpage and generate target audience profiles."
+    rule "Detect page language, return ISO 639-1 code (e.g. 'en', 'pl', 'de')."
+    rule "Write product description in exactly 1 sentence, max 15 words. Say WHAT, not HOW."
+    rule "Identify 2-3 distinct audience groups based on LIFE SITUATION, not product jargon."
+    rule "Write 'who' as if YOU are that person posting on Reddit, not a marketer."
+    rule "Return JSON with locale, description, and groups array."
+    section "GOOD vs BAD EXAMPLES", <<~EXAMPLES
+      Good "who": "I'm 30, trying to lose weight but I hate counting calories"
+      Bad "who": "Adults 25-55 who buy specialty outdoor gear occasionally"
+      Good use_case: "I keep checking 5 different shops and it takes forever"
+      Bad use_case: "Track shop promotions across retailers"
+      Good thread: "spent way too much on yarn this month lol"
+      Bad thread: "budgeting for craft supplies"
+    EXAMPLES
+    user "URL: {url}\n\nBODY TEXT:\n{body_text}\n\nSITEMAP PAGES:\n{sitemap_pages}"
+  end
+  validate("locale is valid ISO 639-1") do |o|
+    o[:locale].is_a?(String) && o[:locale].match?(/\A[a-z]{2}\z/)
+  end
+  validate("description is present and concise") do |o|
+    desc = o[:description].to_s.strip
+    desc.length > 5 && desc.split.size <= 20
+  end
+  validate("has 1-4 audience groups") do |o|
+    o[:groups].is_a?(Array) && o[:groups].size.between?(1, 4)
+  end
+  validate("each group has who field") do |o|
+    o[:groups].is_a?(Array) && o[:groups].all? { |g| g[:who].to_s.strip.length > 10 }
+  end
+  validate("each group has use_cases") do |o|
+    o[:groups].is_a?(Array) && o[:groups].all? { |g| g[:use_cases].is_a?(Array) && g[:use_cases].size >= 2 }
+  end
+  validate("each group has good_fit_threads") do |o|
+    o[:groups].is_a?(Array) && o[:groups].all? do |g|
+      g[:good_fit_threads].is_a?(Array) && g[:good_fit_threads].size >= 2
+    end
+  end
+end
+# =============================================================================
+# DEMO: Run with test adapter — showing cascade failure prevention
+# =============================================================================
+input = {
+  url: "https://deals.example.com",
+  body_text: "Track deals from niche online shops. Get alerts for price drops on craft supplies, " \
+             "hobby gear, and specialty items. We monitor 200+ small retailers daily.",
+  sitemap_pages: [
+    { url: "/yarn-deals", title: "Yarn & Crochet Deals", description: "Sales on yarn, hooks, patterns" },
+    { url: "/gaming-deals", title: "Gaming Merch Deals", description: "Gaming accessories and merch sales" }
+  ]
+}
+# Happy path — good audience profile
+good_response = {
+  locale: "en",
+  description: "Deals aggregator for niche online shops.",
+  groups: [
+    {
+      who: "I'm a crafter who spends too much on supplies every month and my partner is getting annoyed.",
+      use_cases: ["I keep checking 5 different yarn shops", "I always find out about sales after they end"],
+      not_covered: ["Groceries and food delivery", "Air travel"],
+      good_fit_threads: ["spent way too much on yarn this month lol", "anyone else feel guilty about hobby spending?"],
+      bad_fit_threads: ["best grocery cashback apps", "cheap flight deals"]
+    },
+    {
+      who: "I'm a gamer building my setup on a budget and I hate paying full price for peripherals.",
+      use_cases: ["I want to know when my wishlist items go on sale",
+                  "Small shops have better deals but I can't check them all"],
+      not_covered: ["Digital game keys", "Streaming subscriptions"],
+      good_fit_threads: ["just got into minipainting and my wallet hurts", "budget gaming setup thread"],
+      bad_fit_threads: ["best game pass deals", "Netflix vs Disney+"]
+    }
+  ]
+}.to_json
+adapter = RubyLLM::Contract::Adapters::Test.new(response: good_response)
+result = GenerateTargetAudience.run(input, context: { adapter: adapter })
+puts "=== HAPPY PATH ==="
+puts "Status: #{result.status}"
+puts "Locale: #{result.parsed_output[:locale]}"
+puts "Description: #{result.parsed_output[:description]}"
+puts "Groups: #{result.parsed_output[:groups].size}"
+puts "Validation errors: #{result.validation_errors}"
+puts
+# Bad path — invalid locale (legacy code would let "english" pass)
+bad_locale_response = {
+  locale: "english", # Should be "en", not "english"
+  description: "Deals aggregator for niche online shops.",
+  groups: [{ who: "A crafter", use_cases: ["buying yarn"], not_covered: [], good_fit_threads: ["yarn deals"],
+             bad_fit_threads: [] }]
+}.to_json
+bad_adapter = RubyLLM::Contract::Adapters::Test.new(response: bad_locale_response)
+result = GenerateTargetAudience.run(input, context: { adapter: bad_adapter })
+puts "=== BAD LOCALE (legacy code would let this pass) ==="
+puts "Status: #{result.status}"
+puts "Validation errors: #{result.validation_errors}"
+puts
+# Bad path — empty/garbage groups (cascade failure source)
+empty_groups_response = {
+  locale: "en",
+  description: "A website.",
+  groups: []
+}.to_json
+empty_adapter = RubyLLM::Contract::Adapters::Test.new(response: empty_groups_response)
+result = GenerateTargetAudience.run(input, context: { adapter: empty_adapter })
+puts "=== EMPTY GROUPS (would poison all downstream stages) ==="
+puts "Status: #{result.status}"
+puts "Validation errors: #{result.validation_errors}"
+puts
+# The key insight: in a pipeline, you check result.ok? before proceeding
+puts "=== CASCADE PREVENTION ==="
+puts "if result.failed? → don't run SearchExpansion, ThreadClassification, CommentGeneration"
+puts "Legacy code would silently pass bad data to 6 more LLM calls, wasting tokens and producing garbage."