RubyGems - ruby_llm-contract - Versions diffs - 0.7.1 → 0.8.0 - Mend

ruby_llm-contract 0.7.1 → 0.8.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (32) hide show

checksums.yaml +4 -4
data/CHANGELOG.md +96 -0
data/Gemfile.lock +3 -3
data/README.md +64 -316
data/examples/00_basics.rb +110 -428
data/examples/01_fallback_showcase.rb +208 -0
data/examples/02_real_llm_minimal.rb +45 -0
data/examples/03_summarize_with_keywords.rb +128 -0
data/examples/04_summarize_and_translate.rb +196 -0
data/examples/05_eval_dataset.rb +144 -0
data/examples/06_retry_variants.rb +147 -0
data/examples/README.md +20 -128
data/lib/ruby_llm/contract/adapters/ruby_llm.rb +22 -1
data/lib/ruby_llm/contract/cost_calculator.rb +39 -0
data/lib/ruby_llm/contract/eval/model_comparison.rb +4 -4
data/lib/ruby_llm/contract/eval/retry_optimizer.rb +7 -3
data/lib/ruby_llm/contract/step/base.rb +18 -1
data/lib/ruby_llm/contract/step/dsl.rb +38 -0
data/lib/ruby_llm/contract/step/limit_checker.rb +2 -2
data/lib/ruby_llm/contract/token_estimator.rb +20 -3
data/lib/ruby_llm/contract/version.rb +1 -1
data/ruby_llm-contract.gemspec +6 -5
metadata +14 -16
data/examples/01_classify_threads.rb +0 -220
data/examples/02_generate_comment.rb +0 -203
data/examples/03_target_audience.rb +0 -201
data/examples/04_real_llm.rb +0 -410
data/examples/05_output_schema.rb +0 -258
data/examples/07_keyword_extraction.rb +0 -239
data/examples/08_translation.rb +0 -353
data/examples/09_eval_dataset.rb +0 -287
data/examples/10_reddit_full_showcase.rb +0 -363

data/examples/05_output_schema.rb DELETED Viewed

@@ -1,258 +0,0 @@
-# frozen_string_literal: true
-# =============================================================================
-# EXAMPLE 5: Declarative output schema (ruby_llm-schema)
-#
-# Replace manual invariants with a schema DSL.
-# The schema is sent to the LLM provider for structured output enforcement.
-#
-# With Test adapter: schema defines expectations, parsing is auto-inferred.
-# With RubyLLM adapter: schema is also enforced server-side by the provider.
-# =============================================================================
-require_relative "../lib/ruby_llm/contract"
-# =============================================================================
-# STEP 1: BEFORE — legacy approach with output_type + manual invariants
-#
-# Every enum, range, and required field is a separate invariant.
-# Works, but verbose. This is what you'd write WITHOUT output_schema.
-# =============================================================================
-RubyLLM::Contract.configure do |c|
-  c.default_adapter = RubyLLM::Contract::Adapters::Test.new(
-    response: '{"intent": "sales", "confidence": 0.95}'
-  )
-end
-class ClassifyIntentBefore < RubyLLM::Contract::Step::Base
-  input_type  String
-  output_type Hash
-  prompt do
-    system "Classify the user's intent."
-    rule "Return JSON only."
-    user "{input}"
-  end
-  validate("must include intent") { |o| o[:intent].to_s != "" }
-  validate("intent must be allowed") { |o| %w[sales support billing].include?(o[:intent]) }
-  validate("confidence must be a number") { |o| o[:confidence].is_a?(Numeric) }
-  validate("confidence in range") { |o| o[:confidence]&.between?(0.0, 1.0) }
-end
-result = ClassifyIntentBefore.run("I want to buy")
-result.status        # => :ok
-result.parsed_output # => {intent: "sales", confidence: 0.95}
-# =============================================================================
-# STEP 2: AFTER — output_schema replaces structural invariants
-#
-# Same constraints, but declared as a schema.
-# No `output_type`, no `parse :json`, no structural invariants.
-# =============================================================================
-class ClassifyIntentAfter < RubyLLM::Contract::Step::Base
-  input_type String
-  output_schema do
-    string :intent, enum: %w[sales support billing]
-    number :confidence, minimum: 0.0, maximum: 1.0
-  end
-  prompt do
-    system "Classify the user's intent."
-    rule "Return JSON only."
-    user "{input}"
-  end
-end
-result = ClassifyIntentAfter.run("I want to buy")
-result.status        # => :ok
-result.parsed_output # => {intent: "sales", confidence: 0.95}
-# =============================================================================
-# STEP 3: Schema + invariants — best of both worlds
-#
-# Schema handles structure (types, enums, ranges).
-# Invariants handle business logic (cross-validation, conditionals).
-# =============================================================================
-RubyLLM::Contract.configure do |c|
-  c.default_adapter = RubyLLM::Contract::Adapters::Test.new(
-    response: '{"category": "account", "priority": "urgent", "summary": "Projects disappeared"}'
-  )
-end
-class AnalyzeTicket < RubyLLM::Contract::Step::Base
-  input_type RubyLLM::Contract::Types::Hash.schema(
-    title: RubyLLM::Contract::Types::String,
-    body: RubyLLM::Contract::Types::String
-  )
-  output_schema do
-    string :category, enum: %w[billing technical feature_request account other]
-    string :priority, enum: %w[low medium high urgent]
-    string :summary, description: "One-sentence summary"
-  end
-  prompt do
-    system "Analyze support tickets."
-    rule "Return JSON with category, priority, and summary."
-    user "Title: {title}\n\nBody: {body}"
-  end
-  # Schema handles: valid category, valid priority, summary present
-  # Validate handles: cross-validation with input
-  validate("urgent requires justification") do |output, input|
-    next true unless output[:priority] == "urgent"
-    body = input[:body].downcase
-    body.include?("data loss") || body.include?("security") || body.include?("deleted")
-  end
-end
-# Justified urgent:
-result = AnalyzeTicket.run({
-                             title: "Projects disappeared",
-                             body: "All my projects are gone. This is a data loss emergency."
-                           })
-result.status # => :ok
-# Unjustified urgent:
-RubyLLM::Contract.configure do |c|
-  c.default_adapter = RubyLLM::Contract::Adapters::Test.new(
-    response: '{"category": "technical", "priority": "urgent", "summary": "Page is slow"}'
-  )
-end
-result = AnalyzeTicket.run({
-                             title: "Slow page",
-                             body: "Dashboard takes 5 seconds to load."
-                           })
-result.status            # => :validation_failed
-result.validation_errors # => ["urgent requires justification"]
-# =============================================================================
-# STEP 4: Complex schema — nested objects, arrays, optional fields
-# =============================================================================
-RubyLLM::Contract.configure do |c|
-  c.default_adapter = RubyLLM::Contract::Adapters::Test.new(
-    response: '{"locale": "en", "groups": [{"who": "Freelancers", "pain_points": ["invoicing", "time tracking"]}]}'
-  )
-end
-class ProfileAudience < RubyLLM::Contract::Step::Base
-  input_type RubyLLM::Contract::Types::Hash.schema(
-    product: RubyLLM::Contract::Types::String,
-    market: RubyLLM::Contract::Types::String
-  )
-  output_schema do
-    string :locale, description: "ISO 639-1 language code"
-    array :groups, min_items: 1, max_items: 4 do
-      string :who, description: "Audience segment name"
-      array :pain_points, of: :string, min_items: 1
-    end
-  end
-  prompt do
-    system "Generate target audience profiles."
-    user "Product: {product}, Market: {market}"
-  end
-end
-result = ProfileAudience.run({ product: "InvoiceApp", market: "US freelancers" })
-result.status        # => :ok
-result.parsed_output # => {locale: "en", groups: [{who: "Freelancers", pain_points: [...]}]}
-# =============================================================================
-# STEP 5: Schema is provider-agnostic
-#
-# With Test adapter: schema auto-infers JSON parsing, no provider enforcement.
-# With RubyLLM adapter: schema is ALSO sent to provider (structured output).
-# The step definition doesn't change.
-# =============================================================================
-# To use with a real LLM and get provider-side enforcement:
-#
-#   RubyLLM.configure { |c| c.openai_api_key = ENV["OPENAI_API_KEY"] }
-#   adapter = RubyLLM::Contract::Adapters::RubyLLM.new
-#   result = ClassifyIntentAfter.run("I want to buy",
-#     context: { adapter: adapter, model: "gpt-4.1-mini" })
-#
-# The provider enforces the schema — the model MUST return valid JSON
-# matching the schema. Parse errors become nearly impossible.
-# =============================================================================
-# STEP 6: Pipeline with schemas — each step has its own schema
-#
-# Pipeline + output_schema = fully typed, provider-enforced multi-step flow.
-# Each step declares its output schema. Data threads automatically.
-# =============================================================================
-RubyLLM::Contract.configure do |c|
-  c.default_adapter = RubyLLM::Contract::Adapters::Test.new(
-    response: '{"category": "billing", "priority": "high", "summary": "Payment page broken"}'
-  )
-end
-class TriageTicket < RubyLLM::Contract::Step::Base
-  input_type RubyLLM::Contract::Types::Hash.schema(title: RubyLLM::Contract::Types::String, body: RubyLLM::Contract::Types::String)
-  output_schema do
-    string :category, enum: %w[billing technical feature_request account other]
-    string :priority, enum: %w[low medium high urgent]
-    string :summary
-  end
-  prompt do
-    system "Triage support ticket."
-    user "Title: {title}\nBody: {body}"
-  end
-end
-class SuggestAction < RubyLLM::Contract::Step::Base
-  input_type Hash
-  output_schema do
-    string :action
-    string :team, enum: %w[engineering support billing product]
-    boolean :escalate
-  end
-  prompt do
-    system "Suggest action for a triaged ticket."
-    user "Category: {category}, Priority: {priority}, Summary: {summary}"
-  end
-end
-class TicketPipeline < RubyLLM::Contract::Pipeline::Base
-  step TriageTicket,  as: :triage
-  step SuggestAction, as: :action
-end
-# Both steps share the test adapter, so they get the same canned response.
-# With a real LLM, step 2 would get a different response based on step 1's output.
-result = TicketPipeline.run(
-  { title: "Payment page broken", body: "Error 500 on checkout" }
-)
-result.ok?                        # => true
-result.outputs_by_step[:triage]   # => {category: "billing", priority: "high", summary: "..."}
-result.outputs_by_step[:action]   # => same canned response (test adapter)
-result.step_results.length        # => 2
-# =============================================================================
-# SUMMARY
-#
-# Step 1: BEFORE — output_type + parse :json + structural invariants
-# Step 2: AFTER — output_schema replaces all of that
-# Step 3: Schema + invariants — schema for structure, invariants for logic
-# Step 4: Complex schemas — nested objects, arrays, constraints
-# Step 5: Provider-agnostic — same schema works with Test and RubyLLM
-# Step 6: Pipeline + schemas — fully typed multi-step composition
-#
-# output_schema is optional. Existing steps with output_type + invariants
-# continue to work unchanged.
-# =============================================================================

data/examples/07_keyword_extraction.rb DELETED Viewed

@@ -1,239 +0,0 @@
-# frozen_string_literal: true
-# =============================================================================
-# EXAMPLE 7: Keyword Extraction with probability scoring
-#
-# One article in, up to 15 keywords out — each with a relevance
-# probability. Schema enforces structure (array bounds, number range).
-# Invariants enforce logic (sorted, no duplicates, keywords from text).
-#
-# Shows:
-#   - Array output_schema with nested objects
-#   - min_items / max_items constraints
-#   - number range (probability 0.0–1.0)
-#   - Invariant: sorted order (schema can't express this)
-#   - Invariant: uniqueness (schema can't express this)
-#   - Invariant: cross-validation — keywords must appear in source text
-#   - retry_policy for model escalation
-# =============================================================================
-require_relative "../lib/ruby_llm/contract"
-# =============================================================================
-# STEP DEFINITION
-# =============================================================================
-class ExtractKeywords < RubyLLM::Contract::Step::Base
-  input_type String
-  output_schema do
-    array :keywords, min_items: 1, max_items: 15 do
-      string :keyword, description: "1-3 word keyword or phrase"
-      number :probability, minimum: 0.0, maximum: 1.0
-    end
-  end
-  prompt do
-    system "Extract the most relevant keywords from the article."
-    rule "Return up to 15 keywords, each with a relevance probability (0.0 to 1.0)."
-    rule "Sort by probability descending (most relevant first)."
-    rule "Each keyword must be 1-3 words."
-    rule "Keywords must actually appear in or directly relate to the text."
-    example input: "Ruby on Rails is a web framework written in Ruby.",
-            output: '{"keywords":[{"keyword":"Ruby on Rails","probability":0.95},{"keyword":"web framework","probability":0.85},{"keyword":"Ruby","probability":0.75}]}'
-    user "{input}"
-  end
-  validate("sorted by probability descending") do |o|
-    probs = o[:keywords].map { |k| k[:probability] }
-    probs == probs.sort.reverse
-  end
-  validate("no duplicate keywords") do |o|
-    words = o[:keywords].map { |k| k[:keyword].downcase.strip }
-    words.uniq.length == words.length
-  end
-  validate("keywords relate to source text") do |output, input|
-    text = input.downcase
-    matches = output[:keywords].count { |k| text.include?(k[:keyword].downcase) }
-    matches >= (output[:keywords].length * 0.7).ceil
-  end
-  retry_policy models: %w[gpt-4.1-nano gpt-4.1-mini]
-end
-# =============================================================================
-# TEST WITH CANNED RESPONSES
-# =============================================================================
-article = <<~ARTICLE
-  Artificial intelligence is transforming the way developers build software.
-  Machine learning models, particularly large language models like GPT and Claude,
-  are being integrated into development workflows for code generation, testing,
-  and documentation. Ruby developers are adopting gems like ruby_llm to interact
-  with these models through a clean API. The challenge remains in ensuring output
-  quality — without contracts and validation, LLM responses can hallucinate or
-  produce structurally invalid data that breaks downstream systems.
-ARTICLE
-puts "=" * 60
-puts "KEYWORD EXTRACTION"
-puts "=" * 60
-# Happy path — good keywords
-good_response = {
-  keywords: [
-    { keyword: "artificial intelligence", probability: 0.95 },
-    { keyword: "machine learning", probability: 0.90 },
-    { keyword: "large language models", probability: 0.88 },
-    { keyword: "Ruby developers", probability: 0.82 },
-    { keyword: "code generation", probability: 0.78 },
-    { keyword: "output quality", probability: 0.72 },
-    { keyword: "ruby_llm", probability: 0.70 },
-    { keyword: "LLM responses", probability: 0.65 },
-    { keyword: "validation", probability: 0.60 }
-  ]
-}.to_json
-adapter = RubyLLM::Contract::Adapters::Test.new(response: good_response)
-result = ExtractKeywords.run(article, context: { adapter: adapter })
-puts "\n--- Happy path ---"
-puts "Status: #{result.status}"
-result.parsed_output[:keywords].each do |k|
-  bar = "#" * (k[:probability] * 20).round
-  puts "  #{k[:probability].to_s.ljust(5)} #{bar.ljust(20)} #{k[:keyword]}"
-end
-# Bad: unsorted probabilities
-puts "\n--- Invariant catches: unsorted ---"
-unsorted = {
-  keywords: [
-    { keyword: "Ruby", probability: 0.60 },
-    { keyword: "AI", probability: 0.95 },
-    { keyword: "testing", probability: 0.80 }
-  ]
-}.to_json
-r2 = ExtractKeywords.run(article, context: { adapter: RubyLLM::Contract::Adapters::Test.new(response: unsorted) })
-puts "Status: #{r2.status}"
-puts "Errors: #{r2.validation_errors}"
-# Bad: duplicate keywords
-puts "\n--- Invariant catches: duplicates ---"
-dupes = {
-  keywords: [
-    { keyword: "machine learning", probability: 0.95 },
-    { keyword: "Machine Learning", probability: 0.90 },
-    { keyword: "AI", probability: 0.80 }
-  ]
-}.to_json
-r3 = ExtractKeywords.run(article, context: { adapter: RubyLLM::Contract::Adapters::Test.new(response: dupes) })
-puts "Status: #{r3.status}"
-puts "Errors: #{r3.validation_errors}"
-# Bad: hallucinated keywords not in text
-puts "\n--- Invariant catches: hallucinated keywords ---"
-hallucinated = {
-  keywords: [
-    { keyword: "blockchain", probability: 0.95 },
-    { keyword: "cryptocurrency", probability: 0.90 },
-    { keyword: "NFT marketplace", probability: 0.85 },
-    { keyword: "artificial intelligence", probability: 0.80 }
-  ]
-}.to_json
-r4 = ExtractKeywords.run(article, context: { adapter: RubyLLM::Contract::Adapters::Test.new(response: hallucinated) })
-puts "Status: #{r4.status}"
-puts "Errors: #{r4.validation_errors}"
-# =============================================================================
-# PIPELINE: Article → Keywords → Related Topics
-# =============================================================================
-puts "\n\n#{"=" * 60}"
-puts "PIPELINE: Article → Keywords → Related Topics"
-puts "=" * 60
-class SuggestRelatedTopics < RubyLLM::Contract::Step::Base
-  input_type Hash
-  output_schema do
-    array :topics, min_items: 3, max_items: 5 do
-      string :title
-      string :angle, description: "Unique angle or hook for the topic"
-    end
-  end
-  prompt do
-    system "Suggest related article topics based on the extracted keywords."
-    rule "Each topic must have a unique angle, not just repeat the keywords."
-    rule "Topics should be interesting to the same audience."
-    user "Keywords: {keywords}"
-  end
-  validate("topics have unique titles") do |o|
-    titles = o[:topics].map { |t| t[:title].downcase }
-    titles.uniq.length == titles.length
-  end
-  validate("angles are substantive") do |o|
-    o[:topics].all? { |t| t[:angle].to_s.split.length >= 5 }
-  end
-end
-class ArticlePipeline < RubyLLM::Contract::Pipeline::Base
-  step ExtractKeywords,      as: :keywords
-  step SuggestRelatedTopics, as: :topics
-end
-topics_response = {
-  topics: [
-    { title: "Building LLM-Powered Ruby Gems",
-      angle: "How to structure a Ruby gem that wraps LLM APIs with type safety" },
-    { title: "Contract-First AI Development",
-      angle: "Why treating LLM outputs like API responses improves reliability" },
-    { title: "Testing AI Features Without API Calls",
-      angle: "Deterministic testing patterns for LLM integrations using canned adapters" }
-  ]
-}.to_json
-adapter_kw = RubyLLM::Contract::Adapters::Test.new(response: good_response)
-adapter_tp = RubyLLM::Contract::Adapters::Test.new(response: topics_response)
-# Run steps individually (different adapters per step)
-r_kw = ExtractKeywords.run(article, context: { adapter: adapter_kw })
-r_tp = SuggestRelatedTopics.run(r_kw.parsed_output, context: { adapter: adapter_tp })
-puts "\nKeywords → Topics pipeline:"
-puts "  Keywords: #{r_kw.parsed_output[:keywords].length} extracted"
-puts "  Topics:"
-r_tp.parsed_output[:topics].each do |t|
-  puts "    #{t[:title]}"
-  puts "      → #{t[:angle]}"
-end
-# =============================================================================
-# SUMMARY
-#
-# Schema handles:
-#   - Array with 1-15 items (min_items, max_items)
-#   - Each item has keyword (string) + probability (number 0.0-1.0)
-#
-# Invariants handle:
-#   - Sorted by probability (schema can't express ordering)
-#   - No duplicates (schema can't express uniqueness)
-#   - Keywords from source text (schema can't see input)
-#
-# Pipeline:
-#   - Extract keywords → suggest related topics
-#   - Each step has its own schema + invariants
-#
-# Model escalation:
-#   - retry_policy { escalate "nano", "mini" }
-#   - If nano returns unsorted or hallucinated keywords, mini retries
-# =============================================================================