RubyGems - ruby_llm-contract - Versions diffs - 0.7.0 → 0.7.3 - Mend

ruby_llm-contract 0.7.0 → 0.7.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (26) hide show

checksums.yaml +4 -4
data/CHANGELOG.md +66 -0
data/Gemfile.lock +2 -2
data/README.md +51 -252
data/examples/00_basics.rb +110 -428
data/examples/01_fallback_showcase.rb +208 -0
data/examples/02_real_llm_minimal.rb +45 -0
data/examples/03_summarize_with_keywords.rb +128 -0
data/examples/04_summarize_and_translate.rb +196 -0
data/examples/05_eval_dataset.rb +144 -0
data/examples/06_retry_variants.rb +147 -0
data/examples/README.md +20 -128
data/lib/ruby_llm/contract/eval/model_comparison.rb +4 -4
data/lib/ruby_llm/contract/eval/retry_optimizer.rb +7 -3
data/lib/ruby_llm/contract/step/base.rb +28 -12
data/lib/ruby_llm/contract/version.rb +1 -1
metadata +7 -10
data/examples/01_classify_threads.rb +0 -220
data/examples/02_generate_comment.rb +0 -203
data/examples/03_target_audience.rb +0 -201
data/examples/04_real_llm.rb +0 -410
data/examples/05_output_schema.rb +0 -258
data/examples/07_keyword_extraction.rb +0 -239
data/examples/08_translation.rb +0 -353
data/examples/09_eval_dataset.rb +0 -287
data/examples/10_reddit_full_showcase.rb +0 -363

data/examples/00_basics.rb CHANGED Viewed

@@ -1,500 +1,182 @@
 # frozen_string_literal: true
 # =============================================================================
-# EXAMPLE 0: From zero to ruby_llm-contract
+# EXAMPLE 0: SummarizeArticle — from plain prompt to full contract
 #
-# Starting from the simplest case — a plain string prompt —
-# and adding one layer at a time.
+# One step, seven incremental layers. Each layer adds exactly one capability
+# and shows the line of code that unlocks it. Start at Step 1, read top to
+# bottom, stop at the layer that matches your project.
 # =============================================================================
 require_relative "../lib/ruby_llm/contract"
-# Setup: test adapter returns canned responses (no real LLM needed)
-RubyLLM::Contract.configure do |config|
-  config.default_adapter = RubyLLM::Contract::Adapters::Test.new(
-    response: '{"sentiment": "positive"}'
-  )
-end
-# =============================================================================
-# STEP 1: Simplest possible step — plain string prompt
-#
-# BEFORE (typical Rails code):
-#
-#   prompt = "Classify the sentiment of this text as positive, negative, or neutral. Return JSON."
-#   response = OpenAI::Client.new.chat(messages: [{role: "user", content: prompt + "\n\n" + text}])
-#   JSON.parse(response.dig("choices", 0, "message", "content"))
-#
-# Or with ruby_llm (one-liner, but still no validation):
-#
-#   RubyLLM.chat.ask("Classify the sentiment: #{text}")
-#
-# PROBLEM: no validation, no types, no trace, no structure
-# =============================================================================
-# Option A: with output_schema (recommended — simplest)
-class SimpleSentiment < RubyLLM::Contract::Step::Base
-  input_type String # plain Ruby class works!
-  output_schema do
-    string :sentiment
-  end
-  prompt do
-    user "Classify the sentiment of this text as positive, negative, or neutral. Return JSON.\n\n{input}"
-  end
-end
-result = SimpleSentiment.run("I love this product!")
-result.status        # => :ok
-result.parsed_output # => {sentiment: "positive"}
-# Option B: with output_type (plain Ruby class — JSON parsing is implicit for Hash)
-class SimpleSentimentDryTypes < RubyLLM::Contract::Step::Base
-  input_type  String
-  output_type Hash
+ARTICLE = <<~ARTICLE
+  Ruby 3.4 ships with frozen string literals on by default, measurable YJIT
+  speedups on Rails workloads, and tightened Warning.warn category filtering.
+  Parser fixes and faster keyword argument handling land alongside.
+ARTICLE
+CANNED = {
+  tldr: "Ruby 3.4 brings frozen string literals by default, YJIT speedups, parser fixes.",
+  takeaways: [
+    "Frozen string literals are the default in Ruby 3.4",
+    "YJIT delivers measurable Rails speedups",
+    "Parser fixes and keyword argument handling improve"
+  ],
+  tone: "analytical"
+}.freeze
-  prompt do
-    user "Classify the sentiment of this text as positive, negative, or neutral. Return JSON.\n\n{input}"
-  end
-end
-result = SimpleSentimentDryTypes.run("I love this product!")
-result.status        # => :ok
-result.parsed_output # => {sentiment: "positive"}
-# =============================================================================
-# STEP 2: Add system message — separate instructions from data
-#
-# BEFORE:
-#   Everything in one string — instructions and data mixed together
-#
-# AFTER:
-#   system = instructions (constant)
-#   user = data (variable)
-# =============================================================================
-class SentimentWithSystem < RubyLLM::Contract::Step::Base
-  input_type String
-  output_schema do
-    string :sentiment
-  end
-  prompt do
-    system "Classify the sentiment of the user's text."
-    user "{input}"
-  end
+RubyLLM::Contract.configure do |c|
+  c.default_adapter = RubyLLM::Contract::Adapters::Test.new(response: CANNED)
 end
-result = SentimentWithSystem.run("I love this product!")
-result.status        # => :ok
-result.parsed_output # => {sentiment: "positive"}
 # =============================================================================
-# STEP 3: Add rules — clear instructions for the model
-#
-# Rules are individual requirements. One rule per concern.
-# Much clearer than a single wall of text.
+# STEP 1 — Minimal: prompt + output_schema
+# The step enforces JSON shape. No business rules yet, no retry.
 # =============================================================================
-RubyLLM::Contract.configure do |c|
-  c.default_adapter = RubyLLM::Contract::Adapters::Test.new(
-    response: '{"sentiment": "positive", "confidence": 0.95}'
-  )
-end
+class SummarizeArticleMinimal < RubyLLM::Contract::Step::Base
+  prompt <<~PROMPT
+    Summarize this article for a UI card. Return a short TL;DR,
+    3 to 5 key takeaways, and a tone label.
-class SentimentWithRules < RubyLLM::Contract::Step::Base
-  input_type String
+    {input}
+  PROMPT
   output_schema do
-    string :sentiment, enum: %w[positive negative neutral]
-    number :confidence, minimum: 0.0, maximum: 1.0
-  end
-  prompt do
-    system "You are a sentiment classifier."
-    rule "Return JSON only."
-    rule "Use exactly one of: positive, negative, neutral."
-    rule "Include a confidence score from 0.0 to 1.0."
-    user "{input}"
+    string :tldr
+    array  :takeaways, of: :string, min_items: 3, max_items: 5
+    string :tone, enum: %w[neutral positive negative analytical]
   end
 end
-result = SentimentWithRules.run("I love this product!")
-result.status        # => :ok
-result.parsed_output # => {sentiment: "positive", confidence: 0.95}
+r = SummarizeArticleMinimal.run(ARTICLE)
+r.status        # => :ok
+r.parsed_output # => {tldr: "...", takeaways: [...], tone: "analytical"}
 # =============================================================================
-# STEP 4: Add invariants — custom business logic on top of schema
-#
-# Schema handles structure (enums, ranges). Invariants handle logic
-# that schema can't express: conditional rules, cross-field checks, etc.
+# STEP 2 — Add a business rule (validate) that schema cannot express
+# Schema says "takeaways is an array of 3–5 strings". Nothing there says
+# "uniqueness" or "TL;DR fits the card". That is what validate blocks are for.
 # =============================================================================
-class SentimentValidated < RubyLLM::Contract::Step::Base
-  input_type String
-  output_schema do
-    string :sentiment, enum: %w[positive negative neutral]
-    number :confidence, minimum: 0.0, maximum: 1.0
-  end
-  prompt do
-    system "You are a sentiment classifier."
-    rule "Return JSON only."
-    rule "Use exactly one of: positive, negative, neutral."
-    rule "Include a confidence score from 0.0 to 1.0."
-    user "{input}"
-  end
-  # Schema already enforces enum + range. Validate adds custom logic:
-  validate("high confidence required for extreme sentiments") do |o|
-    next true unless %w[positive negative].include?(o[:sentiment])
-    o[:confidence] >= 0.7
-  end
+class SummarizeArticleValidated < SummarizeArticleMinimal
+  validate("TL;DR fits the card")  { |o, _| o[:tldr].length <= 200 }
+  validate("takeaways are unique") { |o, _| o[:takeaways].uniq.size == o[:takeaways].size }
 end
-# Happy path:
-result = SentimentValidated.run("I love this product!")
-result.status        # => :ok
-result.parsed_output # => {sentiment: "positive", confidence: 0.95}
-# Model returns low confidence for extreme sentiment — invariant catches it:
-RubyLLM::Contract.configure do |c|
-  c.default_adapter = RubyLLM::Contract::Adapters::Test.new(
-    response: '{"sentiment": "positive", "confidence": 0.3}'
-  )
-end
-result = SentimentValidated.run("I love this product!")
-result.status            # => :validation_failed
-result.validation_errors # => ["high confidence required for extreme sentiments"]
-# Model returns non-JSON:
-RubyLLM::Contract.configure do |c|
-  c.default_adapter = RubyLLM::Contract::Adapters::Test.new(
-    response: "I think it's positive"
-  )
-end
-result = SentimentValidated.run("I love this product!")
-result.status            # => :parse_error
-result.validation_errors # => ["Failed to parse JSON: ..."]
+r = SummarizeArticleValidated.run(ARTICLE)
+r.status             # => :ok
+r.validation_errors  # => []
 # =============================================================================
-# STEP 5: Add examples — show the model what you expect
-#
-# Few-shot: provide example input → output pairs.
-# The model better understands the expected format.
+# STEP 3 — Structured prompt (prompt AST: system, rule, section, user)
+# Replaces a heredoc. Individual nodes are reorderable, diffable, and
+# inspectable — useful when the prompt grows beyond a few lines.
 # =============================================================================
-RubyLLM::Contract.configure do |c|
-  c.default_adapter = RubyLLM::Contract::Adapters::Test.new(
-    response: '{"sentiment": "positive", "confidence": 0.92}'
-  )
-end
-class SentimentWithExample < RubyLLM::Contract::Step::Base
-  input_type String
-  output_schema do
-    string :sentiment, enum: %w[positive negative neutral]
-    number :confidence, minimum: 0.0, maximum: 1.0
-  end
+class SummarizeArticleStructured < RubyLLM::Contract::Step::Base
   prompt do
-    system "You are a sentiment classifier."
-    rule "Return JSON only."
-    rule "Use exactly one of: positive, negative, neutral."
-    rule "Include a confidence score from 0.0 to 1.0."
-    example input: "This is terrible", output: '{"sentiment": "negative", "confidence": 0.9}'
-    example input: "It works fine I guess", output: '{"sentiment": "neutral", "confidence": 0.6}'
-    user "{input}"
+    system "You summarize articles for a UI card."
+    rule   "Return valid JSON only."
+    rule   "Keep the TL;DR under 200 characters."
+    user   "{input}"
   end
-end
-result = SentimentWithExample.run("I love this product!")
-result.status        # => :ok
-result.parsed_output # => {sentiment: "positive", confidence: 0.92}
-# =============================================================================
-# STEP 6: Sections — replace heredoc string with structured AST
-#
-# BEFORE (typical heredoc prompt — one big string):
-#
-#   prompt = <<~PROMPT                                          # AFTER:
-#     You are a sentiment classifier for customer support.      # system "You are a sentiment classifier for customer support."
-#     Return JSON with sentiment, confidence, and reason.       # rule "Return JSON with sentiment, confidence, and reason."
-#                                                               #
-#     [CONTEXT]                                                 # section "CONTEXT",
-#     We sell software for freelancers.                         #   "We sell software for freelancers."
-#                                                               #
-#     [SCORING GUIDE]                                           # section "SCORING GUIDE",
-#     negative = complaint or frustration                       #   "negative = complaint or frustration\n
-#     positive = praise or thanks                               #    positive = praise or thanks\n
-#     neutral = question or factual statement                   #    neutral = question or factual statement"
-#                                                               #
-#     Classify this: #{text}                                    # user "Classify this: {input}"
-#   PROMPT                                                      #
-#
-# PROBLEM: one big string — can't reorder, diff, or reuse individual sections
-# AFTER: each part is a separate node in the prompt AST
-# =============================================================================
-RubyLLM::Contract.configure do |c|
-  c.default_adapter = RubyLLM::Contract::Adapters::Test.new(
-    response: '{"sentiment": "negative", "confidence": 0.85, "reason": "product complaint"}'
-  )
-end
-class SentimentWithSections < RubyLLM::Contract::Step::Base
-  input_type String
   output_schema do
-    string :sentiment, enum: %w[positive negative neutral]
-    number :confidence, minimum: 0.0, maximum: 1.0
-    string :reason
-  end
-  prompt do
-    system "You are a sentiment classifier for customer support."
-    rule "Return JSON with sentiment, confidence, and reason."
-    section "CONTEXT", "We sell software for freelancers."
-    section "SCORING GUIDE",
-            "negative = complaint or frustration\npositive = praise or thanks\nneutral = question or factual statement"
-    user "Classify this: {input}"
+    string :tldr
+    array  :takeaways, of: :string, min_items: 3, max_items: 5
+    string :tone, enum: %w[neutral positive negative analytical]
   end
 end
-result = SentimentWithSections.run("Your billing page is broken again!")
-result.status        # => :ok
-result.parsed_output # => {sentiment: "negative", confidence: 0.85, reason: "product complaint"}
 # =============================================================================
-# STEP 7: Hash input — multiple fields with auto-interpolation
-#
-# When input is a Hash, each key becomes a template variable.
-# {title} resolves to input[:title], {language} to input[:language], etc.
-# No manual string building needed.
+# STEP 4 — Hash input with variable interpolation
+# When you need more than raw text (audience, language, tenant), take a Hash
+# and reference its keys directly in the prompt.
 # =============================================================================
-RubyLLM::Contract.configure do |c|
-  c.default_adapter = RubyLLM::Contract::Adapters::Test.new(
-    response: '{"category": "billing", "priority": "high"}'
-  )
-end
-class ClassifyTicket < RubyLLM::Contract::Step::Base
+class SummarizeArticleMultiField < RubyLLM::Contract::Step::Base
   input_type RubyLLM::Contract::Types::Hash.schema(
-    title: RubyLLM::Contract::Types::String,
-    body: RubyLLM::Contract::Types::String,
+    article:  RubyLLM::Contract::Types::String,
+    audience: RubyLLM::Contract::Types::String,
     language: RubyLLM::Contract::Types::String
   )
-  output_schema do
-    string :category, enum: %w[billing technical feature_request other]
-    string :priority, enum: %w[low medium high urgent]
+  prompt do
+    system  "You summarize articles for a UI card."
+    rule    "Write the TL;DR and takeaways in {language}."
+    section "AUDIENCE", "{audience}"
+    user    "{article}"
   end
-  prompt do
-    system "You classify customer support tickets."
-    rule "Return JSON with category and priority."
-    rule "Respond in {language}."
-    user "Title: {title}\n\nBody: {body}"
+  output_schema do
+    string :tldr
+    array  :takeaways, of: :string, min_items: 3, max_items: 5
+    string :tone, enum: %w[neutral positive negative analytical]
   end
 end
-result = ClassifyTicket.run(
-  { title: "Can't update credit card", body: "Payment page gives error 500", language: "en" }
-)
-result.status        # => :ok
-result.parsed_output # => {category: "billing", priority: "high"}
 # =============================================================================
-# STEP 8: 2-arity invariants — validate output against input
-#
-# Sometimes you need to check that the output is consistent with the input.
-# A 2-arity invariant receives both |output, input| so you can cross-validate.
+# STEP 5 — 2-arity validate: check the output against the input
+# Catches "lazy" models that echo the article verbatim into the TL;DR.
+# The block receives |output, input| — pass the input-side check too.
 # =============================================================================
-RubyLLM::Contract.configure do |c|
-  c.default_adapter = RubyLLM::Contract::Adapters::Test.new(
-    response: '{"translation": "Bonjour le monde", "source_lang": "en", "target_lang": "fr"}'
-  )
-end
-class Translate < RubyLLM::Contract::Step::Base
-  input_type RubyLLM::Contract::Types::Hash.schema(
-    text: RubyLLM::Contract::Types::String,
-    target_lang: RubyLLM::Contract::Types::String
-  )
-  output_schema do
-    string :translation, min_length: 1
-    string :source_lang
-    string :target_lang
-  end
-  prompt do
-    system "Translate the text to the target language."
-    rule "Return JSON with translation, source_lang, and target_lang."
-    user "Translate to {target_lang}: {text}"
-  end
-  # Schema handles: translation non-empty, all fields present
-  # 2-arity validate: cross-validate output against input
-  validate("target_lang must match requested language") do |output, input|
-    output[:target_lang] == input[:target_lang]
+class SummarizeArticleFaithful < SummarizeArticleValidated
+  validate("TL;DR is shorter than the article") do |output, input|
+    output[:tldr].length < input.length / 2
   end
 end
-result = Translate.run({ text: "Hello world", target_lang: "fr" })
-result.status        # => :ok
-result.parsed_output # => {translation: "Bonjour le monde", source_lang: "en", target_lang: "fr"}
-# What if model returns wrong target language?
-RubyLLM::Contract.configure do |c|
-  c.default_adapter = RubyLLM::Contract::Adapters::Test.new(
-    response: '{"translation": "Hola mundo", "source_lang": "en", "target_lang": "es"}'
-  )
-end
-result = Translate.run({ text: "Hello world", target_lang: "fr" })
-result.status            # => :validation_failed
-result.validation_errors # => ["target_lang must match requested language"]
 # =============================================================================
-# STEP 9: Context override — per-run adapter and model
-#
-# Global config sets defaults. You can override per call via context.
-# Useful for: testing, switching models, A/B testing prompts.
+# STEP 6 — Retry with model fallback
+# Start on the cheapest model. If validate or schema rejects the output,
+# the gem automatically retries on the next model in the list.
 # =============================================================================
-RubyLLM::Contract.configure do |c|
-  c.default_adapter = RubyLLM::Contract::Adapters::Test.new(response: '{"sentiment": "positive"}')
-  c.default_model = "gpt-4.1-mini"
+class SummarizeArticleWithRetry < SummarizeArticleValidated
+  retry_policy models: %w[gpt-5-nano gpt-5-mini gpt-5]
 end
-# Uses global defaults:
-result = SimpleSentiment.run("I love this product!")
-result.status        # => :ok
-result.trace[:model] # => "gpt-4.1-mini"
-# Override adapter and model for this specific call:
-other_adapter = RubyLLM::Contract::Adapters::Test.new(response: '{"sentiment": "neutral"}')
-result = SimpleSentiment.run("I love this product!", context: { adapter: other_adapter, model: "gpt-5" })
-result.status          # => :ok
-result.parsed_output   # => {sentiment: "neutral"}
-result.trace[:model]   # => "gpt-5"
 # =============================================================================
-# STEP 10: StepResult — everything you get back from a run
-#
-# Every .run() returns a StepResult with status, output, errors, and trace.
+# STEP 7 — Inspect the Result: status, parsed_output, trace, per-attempt
+# Every run returns a value object with everything you need to log, debug,
+# or surface in an admin UI.
 # =============================================================================
-adapter = RubyLLM::Contract::Adapters::Test.new(response: '{"sentiment": "positive", "confidence": 0.92}')
-result = SentimentValidated.run("I love this product!", context: { adapter: adapter, model: "gpt-4.1-mini" })
-result.status            # => :ok
-result.ok?               # => true
-result.failed?           # => false
-result.raw_output        # => '{"sentiment": "positive", "confidence": 0.92}'
-result.parsed_output     # => {sentiment: "positive", confidence: 0.92}
-result.validation_errors # => []
-result.trace[:model]     # => "gpt-4.1-mini"
-result.trace[:latency_ms] # => 0     (instant with test adapter)
-result.trace[:messages] # => [{role: :system, content: "..."}, {role: :user, content: "..."}]
-# On failure, you still get everything for debugging:
-bad_adapter = RubyLLM::Contract::Adapters::Test.new(response: '{"sentiment": "positive", "confidence": 0.1}')
-result = SentimentValidated.run("I love this product!", context: { adapter: bad_adapter })
-result.status            # => :validation_failed
-result.ok?               # => false
-result.failed?           # => true
-result.raw_output        # => '{"sentiment": "positive", "confidence": 0.1}'
-result.parsed_output     # => {sentiment: "positive", confidence: 0.1}
-result.validation_errors # => ["high confidence required for extreme sentiments"]
+r = SummarizeArticleWithRetry.run(ARTICLE)
+r.status             # => :ok
+r.ok?                # => true
+r.parsed_output      # => {tldr: "...", takeaways: [...], tone: "analytical"}
+r.validation_errors  # => []
+r.trace[:model]      # => "gpt-5-nano"   (first model that passed)
+r.trace[:attempts]   # => [{attempt: 1, model: "gpt-5-nano", status: :ok, ...}]
+r.trace[:cost]       # => sum of per-attempt costs
 # =============================================================================
-# STEP 11: Pipeline — chain multiple steps with fail-fast
+# STEP 8 — Swap the Test adapter for a real LLM
+# The step itself does not change. Point ruby_llm at your provider and
+# pass Adapters::RubyLLM.new in context.
 #
-# Pipeline::Base composes steps into a sequence.
-# Output of step N automatically becomes input to step N+1.
-# If any step fails, execution halts immediately.
+# See examples/02_real_llm_minimal.rb for a runnable ~30-line version.
 # =============================================================================
-# Step A: classify sentiment
-class PipelineSentiment < RubyLLM::Contract::Step::Base
-  input_type String
-  output_schema do
-    string :text
-    string :sentiment, enum: %w[positive negative neutral]
-  end
-  prompt do
-    system "Classify sentiment and return the original text."
-    user "{input}"
-  end
-end
-# Step B: generate a response based on sentiment
-class PipelineRespond < RubyLLM::Contract::Step::Base
-  input_type Hash
-  output_schema do
-    string :response
-    string :tone
-  end
-  prompt do
-    system "Generate a customer support response matching the sentiment."
-    user "Text: {text}\nSentiment: {sentiment}"
-  end
-end
-# Pipeline: sentiment → respond
-class SupportPipeline < RubyLLM::Contract::Pipeline::Base
-  step PipelineSentiment, as: :classify
-  step PipelineRespond,   as: :respond
-end
-# Happy path:
-RubyLLM::Contract.configure do |c|
-  c.default_adapter = RubyLLM::Contract::Adapters::Test.new(
-    response: '{"text": "I love this product!", "sentiment": "positive"}'
-  )
-end
-# NOTE: with Test adapter, both steps get the same canned response.
-# With a real LLM, each step would get a different response.
-result = SupportPipeline.run("I love this product!")
-result.ok?                            # => true
-result.outputs_by_step[:classify]     # => {text: "I love this product!", sentiment: "positive"}
-result.outputs_by_step[:respond]      # => {text: "I love this product!", sentiment: "positive"}
-result.step_results.length            # => 2
+#   RubyLLM.configure { |c| c.openai_api_key = ENV.fetch("OPENAI_API_KEY") }
+#   adapter = RubyLLM::Contract::Adapters::RubyLLM.new
+#   result  = SummarizeArticleWithRetry.run(article_text, context: { adapter: adapter })
+#
+# Switch provider per call — ruby_llm resolves the provider from the model name:
+#   SummarizeArticleWithRetry.run(article_text, context: { adapter: adapter, model: "claude-sonnet-4-6" })
+#   SummarizeArticleWithRetry.run(article_text, context: { adapter: adapter, model: "gemma3:4b" })  # local Ollama
 # =============================================================================
-# SUMMARY
-#
-# Step 1:  user "{input}"                   — plain string, nothing else
-# Step 2:  system + user                    — separate instructions from data
-# Step 3:  + output_schema                  — declarative output structure
-# Step 4:  + invariants                     — custom business logic on top
-# Step 5:  + examples                       — few-shot
-# Step 6:  + sections                       — labeled context blocks
-# Step 7:  Hash input                       — multiple fields, auto-interpolation
-# Step 8:  2-arity invariants               — cross-validate output vs input
-# Step 9:  context override                 — per-run adapter and model
-# Step 10: StepResult                       — full status, output, errors, trace
-# Step 11: Pipeline                         — chain steps with fail-fast
+# Where to go next
 #
-# Each step adds one layer. Use as many as you need.
-# Even Step 1 gives you: typed input, JSON parsing, and trace.
+# 01_fallback_showcase.rb       — see the retry loop run in 30 seconds
+# 02_real_llm_minimal.rb        — swap Test adapter for Adapters::RubyLLM
+# 03_summarize_with_keywords.rb — growing prompt: add a keywords field
+# 04_summarize_and_translate.rb — pipeline: summarize → translate → review
+# 05_eval_dataset.rb            — define_eval, add_case, regression detection
+# 06_retry_variants.rb          — attempts: 3, reasoning_effort, cross-provider
 # =============================================================================