ruby_llm-contract 0.7.1 → 0.8.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +96 -0
- data/Gemfile.lock +3 -3
- data/README.md +64 -316
- data/examples/00_basics.rb +110 -428
- data/examples/01_fallback_showcase.rb +208 -0
- data/examples/02_real_llm_minimal.rb +45 -0
- data/examples/03_summarize_with_keywords.rb +128 -0
- data/examples/04_summarize_and_translate.rb +196 -0
- data/examples/05_eval_dataset.rb +144 -0
- data/examples/06_retry_variants.rb +147 -0
- data/examples/README.md +20 -128
- data/lib/ruby_llm/contract/adapters/ruby_llm.rb +22 -1
- data/lib/ruby_llm/contract/cost_calculator.rb +39 -0
- data/lib/ruby_llm/contract/eval/model_comparison.rb +4 -4
- data/lib/ruby_llm/contract/eval/retry_optimizer.rb +7 -3
- data/lib/ruby_llm/contract/step/base.rb +18 -1
- data/lib/ruby_llm/contract/step/dsl.rb +38 -0
- data/lib/ruby_llm/contract/step/limit_checker.rb +2 -2
- data/lib/ruby_llm/contract/token_estimator.rb +20 -3
- data/lib/ruby_llm/contract/version.rb +1 -1
- data/ruby_llm-contract.gemspec +6 -5
- metadata +14 -16
- data/examples/01_classify_threads.rb +0 -220
- data/examples/02_generate_comment.rb +0 -203
- data/examples/03_target_audience.rb +0 -201
- data/examples/04_real_llm.rb +0 -410
- data/examples/05_output_schema.rb +0 -258
- data/examples/07_keyword_extraction.rb +0 -239
- data/examples/08_translation.rb +0 -353
- data/examples/09_eval_dataset.rb +0 -287
- data/examples/10_reddit_full_showcase.rb +0 -363
|
@@ -1,201 +0,0 @@
|
|
|
1
|
-
# frozen_string_literal: true
|
|
2
|
-
|
|
3
|
-
# =============================================================================
|
|
4
|
-
# EXAMPLE 3: Target Audience Generation
|
|
5
|
-
#
|
|
6
|
-
# Real-world case: Analyze a product URL and generate audience profiles.
|
|
7
|
-
# This is stage 1 of the pipeline — if it fails, everything downstream
|
|
8
|
-
# breaks (subreddit discovery, thread classification, comment generation).
|
|
9
|
-
# =============================================================================
|
|
10
|
-
|
|
11
|
-
require_relative "../lib/ruby_llm/contract"
|
|
12
|
-
|
|
13
|
-
# =============================================================================
|
|
14
|
-
# BEFORE: Legacy approach (prompt + schema in concern, ad-hoc validation)
|
|
15
|
-
# =============================================================================
|
|
16
|
-
#
|
|
17
|
-
# In the legacy codebase, this is in target_audience_prompts.rb:
|
|
18
|
-
#
|
|
19
|
-
# ```ruby
|
|
20
|
-
# def build_audience_profile_prompt(plan, pages)
|
|
21
|
-
# <<~PROMPT
|
|
22
|
-
# Analyze this webpage. First, understand what the product/service does.
|
|
23
|
-
# Then figure out who the TARGET AUDIENCE is.
|
|
24
|
-
#
|
|
25
|
-
# #{product_input_context(plan, pages)}
|
|
26
|
-
#
|
|
27
|
-
# ---
|
|
28
|
-
#
|
|
29
|
-
# Generate:
|
|
30
|
-
# 1. LOCALE: Detect the page language. Return ISO 639-1 code.
|
|
31
|
-
# 2. DESCRIPTION: Write exactly 1 sentence (max 15 words): WHAT it is.
|
|
32
|
-
# 3. Identify 2-3 distinct target audience groups.
|
|
33
|
-
#
|
|
34
|
-
# CRITICAL: Describe groups by their LIFE SITUATION and EVERYDAY PROBLEMS...
|
|
35
|
-
# [... 40 more lines of instructions ...]
|
|
36
|
-
# PROMPT
|
|
37
|
-
# end
|
|
38
|
-
#
|
|
39
|
-
# # Validation is ad-hoc, buried in the caller:
|
|
40
|
-
# def valid_product_context?(context)
|
|
41
|
-
# context.is_a?(Hash) &&
|
|
42
|
-
# context["locale"].present? &&
|
|
43
|
-
# context["description"].present? &&
|
|
44
|
-
# context["groups"].is_a?(Array) &&
|
|
45
|
-
# context["groups"].size >= 1
|
|
46
|
-
# end
|
|
47
|
-
# ```
|
|
48
|
-
#
|
|
49
|
-
# PROBLEMS:
|
|
50
|
-
# - 50-line heredoc string — impossible to diff meaningfully
|
|
51
|
-
# - Validation is a separate method, easy to forget to call
|
|
52
|
-
# - If locale is wrong (e.g. "english" instead of "en"), it passes validation
|
|
53
|
-
# - If groups are present but empty/garbage, no way to catch it
|
|
54
|
-
# - Failure here silently poisons all 6 downstream stages
|
|
55
|
-
|
|
56
|
-
# =============================================================================
|
|
57
|
-
# AFTER: ruby_llm-contract approach
|
|
58
|
-
# =============================================================================
|
|
59
|
-
|
|
60
|
-
class GenerateTargetAudience < RubyLLM::Contract::Step::Base
|
|
61
|
-
input_type RubyLLM::Contract::Types::Hash.schema(
|
|
62
|
-
url: RubyLLM::Contract::Types::String,
|
|
63
|
-
body_text: RubyLLM::Contract::Types::String,
|
|
64
|
-
sitemap_pages: RubyLLM::Contract::Types::Array.of(RubyLLM::Contract::Types::Hash)
|
|
65
|
-
)
|
|
66
|
-
output_type Hash
|
|
67
|
-
|
|
68
|
-
prompt do
|
|
69
|
-
system "Analyze a product webpage and generate target audience profiles."
|
|
70
|
-
|
|
71
|
-
rule "Detect page language, return ISO 639-1 code (e.g. 'en', 'pl', 'de')."
|
|
72
|
-
rule "Write product description in exactly 1 sentence, max 15 words. Say WHAT, not HOW."
|
|
73
|
-
rule "Identify 2-3 distinct audience groups based on LIFE SITUATION, not product jargon."
|
|
74
|
-
rule "Write 'who' as if YOU are that person posting on Reddit, not a marketer."
|
|
75
|
-
rule "Return JSON with locale, description, and groups array."
|
|
76
|
-
|
|
77
|
-
section "GOOD vs BAD EXAMPLES", <<~EXAMPLES
|
|
78
|
-
Good "who": "I'm 30, trying to lose weight but I hate counting calories"
|
|
79
|
-
Bad "who": "Adults 25-55 who buy specialty outdoor gear occasionally"
|
|
80
|
-
|
|
81
|
-
Good use_case: "I keep checking 5 different shops and it takes forever"
|
|
82
|
-
Bad use_case: "Track shop promotions across retailers"
|
|
83
|
-
|
|
84
|
-
Good thread: "spent way too much on yarn this month lol"
|
|
85
|
-
Bad thread: "budgeting for craft supplies"
|
|
86
|
-
EXAMPLES
|
|
87
|
-
|
|
88
|
-
user "URL: {url}\n\nBODY TEXT:\n{body_text}\n\nSITEMAP PAGES:\n{sitemap_pages}"
|
|
89
|
-
end
|
|
90
|
-
|
|
91
|
-
validate("locale is valid ISO 639-1") do |o|
|
|
92
|
-
o[:locale].is_a?(String) && o[:locale].match?(/\A[a-z]{2}\z/)
|
|
93
|
-
end
|
|
94
|
-
|
|
95
|
-
validate("description is present and concise") do |o|
|
|
96
|
-
desc = o[:description].to_s.strip
|
|
97
|
-
desc.length > 5 && desc.split.size <= 20
|
|
98
|
-
end
|
|
99
|
-
|
|
100
|
-
validate("has 1-4 audience groups") do |o|
|
|
101
|
-
o[:groups].is_a?(Array) && o[:groups].size.between?(1, 4)
|
|
102
|
-
end
|
|
103
|
-
|
|
104
|
-
validate("each group has who field") do |o|
|
|
105
|
-
o[:groups].is_a?(Array) && o[:groups].all? { |g| g[:who].to_s.strip.length > 10 }
|
|
106
|
-
end
|
|
107
|
-
|
|
108
|
-
validate("each group has use_cases") do |o|
|
|
109
|
-
o[:groups].is_a?(Array) && o[:groups].all? { |g| g[:use_cases].is_a?(Array) && g[:use_cases].size >= 2 }
|
|
110
|
-
end
|
|
111
|
-
|
|
112
|
-
validate("each group has good_fit_threads") do |o|
|
|
113
|
-
o[:groups].is_a?(Array) && o[:groups].all? do |g|
|
|
114
|
-
g[:good_fit_threads].is_a?(Array) && g[:good_fit_threads].size >= 2
|
|
115
|
-
end
|
|
116
|
-
end
|
|
117
|
-
end
|
|
118
|
-
|
|
119
|
-
# =============================================================================
|
|
120
|
-
# DEMO: Run with test adapter — showing cascade failure prevention
|
|
121
|
-
# =============================================================================
|
|
122
|
-
|
|
123
|
-
input = {
|
|
124
|
-
url: "https://deals.example.com",
|
|
125
|
-
body_text: "Track deals from niche online shops. Get alerts for price drops on craft supplies, " \
|
|
126
|
-
"hobby gear, and specialty items. We monitor 200+ small retailers daily.",
|
|
127
|
-
sitemap_pages: [
|
|
128
|
-
{ url: "/yarn-deals", title: "Yarn & Crochet Deals", description: "Sales on yarn, hooks, patterns" },
|
|
129
|
-
{ url: "/gaming-deals", title: "Gaming Merch Deals", description: "Gaming accessories and merch sales" }
|
|
130
|
-
]
|
|
131
|
-
}
|
|
132
|
-
|
|
133
|
-
# Happy path — good audience profile
|
|
134
|
-
good_response = {
|
|
135
|
-
locale: "en",
|
|
136
|
-
description: "Deals aggregator for niche online shops.",
|
|
137
|
-
groups: [
|
|
138
|
-
{
|
|
139
|
-
who: "I'm a crafter who spends too much on supplies every month and my partner is getting annoyed.",
|
|
140
|
-
use_cases: ["I keep checking 5 different yarn shops", "I always find out about sales after they end"],
|
|
141
|
-
not_covered: ["Groceries and food delivery", "Air travel"],
|
|
142
|
-
good_fit_threads: ["spent way too much on yarn this month lol", "anyone else feel guilty about hobby spending?"],
|
|
143
|
-
bad_fit_threads: ["best grocery cashback apps", "cheap flight deals"]
|
|
144
|
-
},
|
|
145
|
-
{
|
|
146
|
-
who: "I'm a gamer building my setup on a budget and I hate paying full price for peripherals.",
|
|
147
|
-
use_cases: ["I want to know when my wishlist items go on sale",
|
|
148
|
-
"Small shops have better deals but I can't check them all"],
|
|
149
|
-
not_covered: ["Digital game keys", "Streaming subscriptions"],
|
|
150
|
-
good_fit_threads: ["just got into minipainting and my wallet hurts", "budget gaming setup thread"],
|
|
151
|
-
bad_fit_threads: ["best game pass deals", "Netflix vs Disney+"]
|
|
152
|
-
}
|
|
153
|
-
]
|
|
154
|
-
}.to_json
|
|
155
|
-
|
|
156
|
-
adapter = RubyLLM::Contract::Adapters::Test.new(response: good_response)
|
|
157
|
-
result = GenerateTargetAudience.run(input, context: { adapter: adapter })
|
|
158
|
-
|
|
159
|
-
puts "=== HAPPY PATH ==="
|
|
160
|
-
puts "Status: #{result.status}"
|
|
161
|
-
puts "Locale: #{result.parsed_output[:locale]}"
|
|
162
|
-
puts "Description: #{result.parsed_output[:description]}"
|
|
163
|
-
puts "Groups: #{result.parsed_output[:groups].size}"
|
|
164
|
-
puts "Validation errors: #{result.validation_errors}"
|
|
165
|
-
puts
|
|
166
|
-
|
|
167
|
-
# Bad path — invalid locale (legacy code would let "english" pass)
|
|
168
|
-
bad_locale_response = {
|
|
169
|
-
locale: "english", # Should be "en", not "english"
|
|
170
|
-
description: "Deals aggregator for niche online shops.",
|
|
171
|
-
groups: [{ who: "A crafter", use_cases: ["buying yarn"], not_covered: [], good_fit_threads: ["yarn deals"],
|
|
172
|
-
bad_fit_threads: [] }]
|
|
173
|
-
}.to_json
|
|
174
|
-
|
|
175
|
-
bad_adapter = RubyLLM::Contract::Adapters::Test.new(response: bad_locale_response)
|
|
176
|
-
result = GenerateTargetAudience.run(input, context: { adapter: bad_adapter })
|
|
177
|
-
|
|
178
|
-
puts "=== BAD LOCALE (legacy code would let this pass) ==="
|
|
179
|
-
puts "Status: #{result.status}"
|
|
180
|
-
puts "Validation errors: #{result.validation_errors}"
|
|
181
|
-
puts
|
|
182
|
-
|
|
183
|
-
# Bad path — empty/garbage groups (cascade failure source)
|
|
184
|
-
empty_groups_response = {
|
|
185
|
-
locale: "en",
|
|
186
|
-
description: "A website.",
|
|
187
|
-
groups: []
|
|
188
|
-
}.to_json
|
|
189
|
-
|
|
190
|
-
empty_adapter = RubyLLM::Contract::Adapters::Test.new(response: empty_groups_response)
|
|
191
|
-
result = GenerateTargetAudience.run(input, context: { adapter: empty_adapter })
|
|
192
|
-
|
|
193
|
-
puts "=== EMPTY GROUPS (would poison all downstream stages) ==="
|
|
194
|
-
puts "Status: #{result.status}"
|
|
195
|
-
puts "Validation errors: #{result.validation_errors}"
|
|
196
|
-
puts
|
|
197
|
-
|
|
198
|
-
# The key insight: in a pipeline, you check result.ok? before proceeding
|
|
199
|
-
puts "=== CASCADE PREVENTION ==="
|
|
200
|
-
puts "if result.failed? → don't run SearchExpansion, ThreadClassification, CommentGeneration"
|
|
201
|
-
puts "Legacy code would silently pass bad data to 6 more LLM calls, wasting tokens and producing garbage."
|
data/examples/04_real_llm.rb
DELETED
|
@@ -1,410 +0,0 @@
|
|
|
1
|
-
# frozen_string_literal: true
|
|
2
|
-
|
|
3
|
-
# =============================================================================
|
|
4
|
-
# EXAMPLE 4: Real LLM calls via ruby_llm
|
|
5
|
-
#
|
|
6
|
-
# All previous examples used Adapters::Test with canned responses.
|
|
7
|
-
# This example shows how to connect to a real LLM provider
|
|
8
|
-
# (OpenAI, Anthropic, Google, etc.) using Adapters::RubyLLM.
|
|
9
|
-
#
|
|
10
|
-
# REQUIREMENTS:
|
|
11
|
-
# gem install ruby_llm
|
|
12
|
-
# export OPENAI_API_KEY=sk-... # or any provider key
|
|
13
|
-
#
|
|
14
|
-
# RUN:
|
|
15
|
-
# ruby examples/04_real_llm.rb
|
|
16
|
-
# =============================================================================
|
|
17
|
-
|
|
18
|
-
require_relative "../lib/ruby_llm/contract"
|
|
19
|
-
|
|
20
|
-
# =============================================================================
|
|
21
|
-
# STEP 1: Configure — single block, API key auto-creates adapter
|
|
22
|
-
#
|
|
23
|
-
# Just set your API key. The adapter is created automatically.
|
|
24
|
-
# =============================================================================
|
|
25
|
-
|
|
26
|
-
RubyLLM::Contract.configure do |config|
|
|
27
|
-
config.openai_api_key = ENV.fetch("OPENAI_API_KEY", nil)
|
|
28
|
-
# config.anthropic_api_key = ENV.fetch("ANTHROPIC_API_KEY", nil)
|
|
29
|
-
config.default_model = "gpt-4.1-mini"
|
|
30
|
-
end
|
|
31
|
-
|
|
32
|
-
# =============================================================================
|
|
33
|
-
# STEP 3: Define a step — identical to what you'd write with Test adapter
|
|
34
|
-
#
|
|
35
|
-
# The step doesn't know or care which adapter runs it.
|
|
36
|
-
# Same types, same prompt, same contract.
|
|
37
|
-
# =============================================================================
|
|
38
|
-
|
|
39
|
-
class ClassifyIntent < RubyLLM::Contract::Step::Base
|
|
40
|
-
input_type String
|
|
41
|
-
|
|
42
|
-
output_schema do
|
|
43
|
-
string :intent, enum: %w[sales support billing other]
|
|
44
|
-
number :confidence, minimum: 0.0, maximum: 1.0
|
|
45
|
-
end
|
|
46
|
-
|
|
47
|
-
prompt do
|
|
48
|
-
system "You are an intent classifier for a customer support system."
|
|
49
|
-
rule "Return JSON only, no markdown."
|
|
50
|
-
example input: "I want to upgrade my plan",
|
|
51
|
-
output: '{"intent": "sales", "confidence": 0.95}'
|
|
52
|
-
example input: "My invoice is wrong",
|
|
53
|
-
output: '{"intent": "billing", "confidence": 0.9}'
|
|
54
|
-
user "{input}"
|
|
55
|
-
end
|
|
56
|
-
end
|
|
57
|
-
|
|
58
|
-
# =============================================================================
|
|
59
|
-
# STEP 4: Run it — real LLM call, full contract enforcement
|
|
60
|
-
#
|
|
61
|
-
# The adapter sends the prompt to the real model.
|
|
62
|
-
# The contract validates the response just like with Test adapter.
|
|
63
|
-
# You get real token usage in the trace.
|
|
64
|
-
# =============================================================================
|
|
65
|
-
|
|
66
|
-
puts "Calling LLM..."
|
|
67
|
-
result = ClassifyIntent.run("I can't log in to my account")
|
|
68
|
-
|
|
69
|
-
puts "Status: #{result.status}" # => :ok
|
|
70
|
-
puts "Output: #{result.parsed_output}" # => {intent: "support", confidence: 0.95}
|
|
71
|
-
puts "Model: #{result.trace[:model]}" # => "gpt-4.1-mini"
|
|
72
|
-
puts "Latency: #{result.trace[:latency_ms]}ms" # => 823ms (real network time)
|
|
73
|
-
puts "Tokens: #{result.trace[:usage]}" # => {input_tokens: 142, output_tokens: 18}
|
|
74
|
-
|
|
75
|
-
if result.ok?
|
|
76
|
-
puts "Intent: #{result.parsed_output[:intent]}"
|
|
77
|
-
else
|
|
78
|
-
puts "FAILED: #{result.validation_errors}"
|
|
79
|
-
end
|
|
80
|
-
|
|
81
|
-
# =============================================================================
|
|
82
|
-
# STEP 5: Override model per call — A/B test different models
|
|
83
|
-
#
|
|
84
|
-
# Use context to try different models without changing the step definition.
|
|
85
|
-
# =============================================================================
|
|
86
|
-
|
|
87
|
-
puts "\n--- Comparing models ---"
|
|
88
|
-
|
|
89
|
-
%w[gpt-4.1-mini gpt-4.1-nano].each do |model|
|
|
90
|
-
r = ClassifyIntent.run("I need a refund", context: { model: model })
|
|
91
|
-
puts "#{model}: #{r.parsed_output} (#{r.trace[:latency_ms]}ms, #{r.trace[:usage]})"
|
|
92
|
-
end
|
|
93
|
-
|
|
94
|
-
# =============================================================================
|
|
95
|
-
# STEP 6: Control generation params — temperature, max_tokens
|
|
96
|
-
#
|
|
97
|
-
# Options are forwarded to ruby_llm. Lower temperature = more deterministic.
|
|
98
|
-
# =============================================================================
|
|
99
|
-
|
|
100
|
-
puts "\n--- With temperature 0 ---"
|
|
101
|
-
result = ClassifyIntent.run(
|
|
102
|
-
"Do you have an enterprise plan?",
|
|
103
|
-
context: { model: "gpt-4.1-mini", temperature: 0.0, max_tokens: 50 }
|
|
104
|
-
)
|
|
105
|
-
puts "Output: #{result.parsed_output}"
|
|
106
|
-
|
|
107
|
-
# =============================================================================
|
|
108
|
-
# STEP 7: Same step, different provider — just change the model
|
|
109
|
-
#
|
|
110
|
-
# If you have an Anthropic key configured, you can switch with one line.
|
|
111
|
-
# The prompt, contract, and invariants are provider-agnostic.
|
|
112
|
-
# =============================================================================
|
|
113
|
-
|
|
114
|
-
# Uncomment if you have an Anthropic key:
|
|
115
|
-
# puts "\n--- Anthropic ---"
|
|
116
|
-
# result = ClassifyIntent.run(
|
|
117
|
-
# "I want to cancel my subscription",
|
|
118
|
-
# context: { model: "claude-sonnet-4-6" }
|
|
119
|
-
# )
|
|
120
|
-
# puts "Output: #{result.parsed_output}"
|
|
121
|
-
|
|
122
|
-
# =============================================================================
|
|
123
|
-
# STEP 8: Error handling — what happens when things go wrong
|
|
124
|
-
#
|
|
125
|
-
# Contract enforcement works the same with real LLM responses.
|
|
126
|
-
# If the model returns something invalid, you get a clear error.
|
|
127
|
-
# =============================================================================
|
|
128
|
-
|
|
129
|
-
class StrictClassifier < RubyLLM::Contract::Step::Base
|
|
130
|
-
input_type String
|
|
131
|
-
|
|
132
|
-
output_schema do
|
|
133
|
-
string :sentiment, enum: %w[positive negative neutral]
|
|
134
|
-
end
|
|
135
|
-
|
|
136
|
-
prompt do
|
|
137
|
-
system "Classify the sentiment."
|
|
138
|
-
user "{input}"
|
|
139
|
-
end
|
|
140
|
-
end
|
|
141
|
-
|
|
142
|
-
puts "\n--- Strict classifier ---"
|
|
143
|
-
result = StrictClassifier.run("This product is amazing!", context: { model: "gpt-4.1-mini" })
|
|
144
|
-
|
|
145
|
-
if result.ok?
|
|
146
|
-
puts "Passed: #{result.parsed_output}"
|
|
147
|
-
else
|
|
148
|
-
puts "Failed: #{result.status} — #{result.validation_errors}"
|
|
149
|
-
puts "Raw: #{result.raw_output}"
|
|
150
|
-
end
|
|
151
|
-
|
|
152
|
-
# =============================================================================
|
|
153
|
-
# STEP 9: Full power — every prompt feature combined with a real LLM
|
|
154
|
-
#
|
|
155
|
-
# This step uses EVERY feature from 00_basics.rb in a single definition:
|
|
156
|
-
# - system message (main instruction)
|
|
157
|
-
# - rules (individual requirements)
|
|
158
|
-
# - sections (labeled context blocks)
|
|
159
|
-
# - examples (few-shot input/output pairs)
|
|
160
|
-
# - Hash input (multi-field auto-interpolation)
|
|
161
|
-
# - 1-arity invariants (validate output alone)
|
|
162
|
-
# - 2-arity invariants (cross-validate output against input)
|
|
163
|
-
#
|
|
164
|
-
# All of it running against a real LLM.
|
|
165
|
-
# =============================================================================
|
|
166
|
-
|
|
167
|
-
class AnalyzeTicket < RubyLLM::Contract::Step::Base
|
|
168
|
-
input_type RubyLLM::Contract::Types::Hash.schema(
|
|
169
|
-
title: RubyLLM::Contract::Types::String,
|
|
170
|
-
body: RubyLLM::Contract::Types::String,
|
|
171
|
-
product: RubyLLM::Contract::Types::String,
|
|
172
|
-
customer_tier: RubyLLM::Contract::Types::String
|
|
173
|
-
)
|
|
174
|
-
output_type Hash
|
|
175
|
-
|
|
176
|
-
prompt do
|
|
177
|
-
system "You are a support ticket analyzer for a SaaS company."
|
|
178
|
-
|
|
179
|
-
rule "Return JSON only, no markdown, no explanation."
|
|
180
|
-
rule "Include all required fields: category, priority, sentiment, summary, suggested_action."
|
|
181
|
-
rule "Categories: billing, technical, feature_request, account, other."
|
|
182
|
-
rule "Priorities: low, medium, high, urgent."
|
|
183
|
-
rule "Sentiments: positive, negative, neutral, frustrated."
|
|
184
|
-
rule "Summary must be one sentence, max 100 characters."
|
|
185
|
-
|
|
186
|
-
section "PRODUCT CONTEXT", "Product: {product}\nCustomer tier: {customer_tier}"
|
|
187
|
-
|
|
188
|
-
section "PRIORITY RULES",
|
|
189
|
-
"urgent = data loss or security issue\n" \
|
|
190
|
-
"high = service down or billing error\n" \
|
|
191
|
-
"medium = feature broken but workaround exists\n" \
|
|
192
|
-
"low = question, feedback, or cosmetic issue"
|
|
193
|
-
|
|
194
|
-
example input: "Title: Can't export CSV\n\nBody: Export button returns 500 error since yesterday.",
|
|
195
|
-
output: '{"category":"technical","priority":"high","sentiment":"frustrated",' \
|
|
196
|
-
'"summary":"CSV export returns 500 error","suggested_action":"escalate to engineering"}'
|
|
197
|
-
|
|
198
|
-
example input: "Title: Dark mode request\n\nBody: Would love dark mode for late night work!",
|
|
199
|
-
output: '{"category":"feature_request","priority":"low","sentiment":"positive",' \
|
|
200
|
-
'"summary":"Requests dark mode feature","suggested_action":"add to feature backlog"}'
|
|
201
|
-
|
|
202
|
-
user "Title: {title}\n\nBody: {body}"
|
|
203
|
-
end
|
|
204
|
-
|
|
205
|
-
validate("category must be valid") do |o|
|
|
206
|
-
%w[billing technical feature_request account other].include?(o[:category])
|
|
207
|
-
end
|
|
208
|
-
|
|
209
|
-
validate("priority must be valid") do |o|
|
|
210
|
-
%w[low medium high urgent].include?(o[:priority])
|
|
211
|
-
end
|
|
212
|
-
|
|
213
|
-
validate("sentiment must be valid") do |o|
|
|
214
|
-
%w[positive negative neutral frustrated].include?(o[:sentiment])
|
|
215
|
-
end
|
|
216
|
-
|
|
217
|
-
validate("summary must be present") do |o|
|
|
218
|
-
!o[:summary].to_s.strip.empty?
|
|
219
|
-
end
|
|
220
|
-
|
|
221
|
-
validate("summary must be concise") do |o|
|
|
222
|
-
o[:summary].to_s.length <= 100
|
|
223
|
-
end
|
|
224
|
-
|
|
225
|
-
validate("suggested_action must be present") do |o|
|
|
226
|
-
!o[:suggested_action].to_s.strip.empty?
|
|
227
|
-
end
|
|
228
|
-
|
|
229
|
-
# 2-arity: cross-validate output against input
|
|
230
|
-
validate("urgent priority requires justification in body") do |output, input|
|
|
231
|
-
next true unless output[:priority] == "urgent"
|
|
232
|
-
|
|
233
|
-
body = input[:body].downcase
|
|
234
|
-
body.include?("data loss") || body.include?("security") ||
|
|
235
|
-
body.include?("breach") || body.include?("leak") || body.include?("deleted")
|
|
236
|
-
end
|
|
237
|
-
end
|
|
238
|
-
|
|
239
|
-
puts "\n--- Full power: AnalyzeTicket ---"
|
|
240
|
-
|
|
241
|
-
result = AnalyzeTicket.run(
|
|
242
|
-
{
|
|
243
|
-
title: "All my projects disappeared",
|
|
244
|
-
body: "I logged in this morning and all 47 projects are gone. This is a data loss emergency. " \
|
|
245
|
-
"I have a client demo in 2 hours.",
|
|
246
|
-
product: "ProjectHub Pro",
|
|
247
|
-
customer_tier: "enterprise"
|
|
248
|
-
},
|
|
249
|
-
context: { model: "gpt-4.1-mini", temperature: 0.0 }
|
|
250
|
-
)
|
|
251
|
-
|
|
252
|
-
puts "Status: #{result.status}"
|
|
253
|
-
puts "Category: #{result.parsed_output&.dig(:category)}"
|
|
254
|
-
puts "Priority: #{result.parsed_output&.dig(:priority)}"
|
|
255
|
-
puts "Sentiment:#{result.parsed_output&.dig(:sentiment)}"
|
|
256
|
-
puts "Summary: #{result.parsed_output&.dig(:summary)}"
|
|
257
|
-
puts "Action: #{result.parsed_output&.dig(:suggested_action)}"
|
|
258
|
-
puts "Latency: #{result.trace[:latency_ms]}ms"
|
|
259
|
-
puts "Tokens: #{result.trace[:usage]}"
|
|
260
|
-
|
|
261
|
-
puts "ERRORS: #{result.validation_errors}" if result.failed?
|
|
262
|
-
|
|
263
|
-
# =============================================================================
|
|
264
|
-
# STEP 10: Full power — Pipeline + output_schema + invariants + real LLM
|
|
265
|
-
#
|
|
266
|
-
# This combines everything: 3-step pipeline where each step has its own
|
|
267
|
-
# output_schema (provider-enforced), cross-validation invariants,
|
|
268
|
-
# and real LLM calls. If any step hallucinates, execution stops.
|
|
269
|
-
#
|
|
270
|
-
# Use case: meeting transcript → follow-up email
|
|
271
|
-
# Step 1 (listener): extract decisions + action items
|
|
272
|
-
# Step 2 (critic): flag vague owners/deadlines
|
|
273
|
-
# Step 3 (writer): generate send-ready follow-up email
|
|
274
|
-
# =============================================================================
|
|
275
|
-
|
|
276
|
-
class ExtractMeetingItems < RubyLLM::Contract::Step::Base
|
|
277
|
-
input_type String
|
|
278
|
-
|
|
279
|
-
output_schema do
|
|
280
|
-
array :decisions do
|
|
281
|
-
string :id
|
|
282
|
-
string :description
|
|
283
|
-
string :made_by
|
|
284
|
-
end
|
|
285
|
-
array :action_items do
|
|
286
|
-
string :id
|
|
287
|
-
string :task
|
|
288
|
-
string :owner
|
|
289
|
-
string :deadline
|
|
290
|
-
end
|
|
291
|
-
end
|
|
292
|
-
|
|
293
|
-
prompt do
|
|
294
|
-
system "Extract decisions and action items from a meeting transcript."
|
|
295
|
-
rule "Only include decisions explicitly stated, never infer."
|
|
296
|
-
rule "Assign sequential IDs: D1, D2... for decisions, A1, A2... for action items."
|
|
297
|
-
user "{input}"
|
|
298
|
-
end
|
|
299
|
-
end
|
|
300
|
-
|
|
301
|
-
class AnalyzeAmbiguities < RubyLLM::Contract::Step::Base
|
|
302
|
-
input_type Hash
|
|
303
|
-
|
|
304
|
-
output_schema do
|
|
305
|
-
array :decisions do
|
|
306
|
-
string :id
|
|
307
|
-
string :description
|
|
308
|
-
string :made_by
|
|
309
|
-
end
|
|
310
|
-
array :action_items do
|
|
311
|
-
string :id
|
|
312
|
-
string :task
|
|
313
|
-
string :owner
|
|
314
|
-
string :deadline
|
|
315
|
-
end
|
|
316
|
-
array :analyses do
|
|
317
|
-
string :action_item_id
|
|
318
|
-
string :status, enum: %w[clear ambiguous]
|
|
319
|
-
array :issues do
|
|
320
|
-
string :field, enum: %w[owner deadline scope]
|
|
321
|
-
string :problem
|
|
322
|
-
string :clarification_question
|
|
323
|
-
end
|
|
324
|
-
end
|
|
325
|
-
end
|
|
326
|
-
|
|
327
|
-
prompt do
|
|
328
|
-
system "Review action items for completeness. Flag vague owners, missing deadlines, unclear scope."
|
|
329
|
-
rule "Pass through the original decisions and action_items unchanged."
|
|
330
|
-
rule "Add an analyses array with one entry per action item."
|
|
331
|
-
user "Decisions: {decisions}\n\nAction items: {action_items}"
|
|
332
|
-
end
|
|
333
|
-
|
|
334
|
-
# Cross-validate: every action item from step 1 must be analyzed
|
|
335
|
-
validate("all action items analyzed") do |output, input|
|
|
336
|
-
output[:analyses].map { |a| a[:action_item_id] }.sort ==
|
|
337
|
-
input[:action_items].map { |a| a[:id] }.sort
|
|
338
|
-
end
|
|
339
|
-
end
|
|
340
|
-
|
|
341
|
-
class GenerateMeetingEmail < RubyLLM::Contract::Step::Base
|
|
342
|
-
input_type Hash
|
|
343
|
-
|
|
344
|
-
output_schema do
|
|
345
|
-
string :subject
|
|
346
|
-
string :body
|
|
347
|
-
end
|
|
348
|
-
|
|
349
|
-
prompt do
|
|
350
|
-
system "Write a professional follow-up email. List decisions, clear action items " \
|
|
351
|
-
"with owners and deadlines, and embed clarification questions for ambiguous items."
|
|
352
|
-
user "Decisions: {decisions}\nAction items: {action_items}\nAnalyses: {analyses}"
|
|
353
|
-
end
|
|
354
|
-
|
|
355
|
-
validate("subject must be concise") { |o| o[:subject].length <= 80 }
|
|
356
|
-
validate("body must not be empty") { |o| !o[:body].to_s.strip.empty? }
|
|
357
|
-
end
|
|
358
|
-
|
|
359
|
-
class MeetingFollowUpPipeline < RubyLLM::Contract::Pipeline::Base
|
|
360
|
-
step ExtractMeetingItems, as: :extract
|
|
361
|
-
step AnalyzeAmbiguities, as: :analyze
|
|
362
|
-
step GenerateMeetingEmail, as: :email
|
|
363
|
-
end
|
|
364
|
-
|
|
365
|
-
transcript = <<~TRANSCRIPT
|
|
366
|
-
Sarah: Let's go with the new pricing model starting Q3.
|
|
367
|
-
Tom: I'll update the billing system... at some point.
|
|
368
|
-
Sarah: Someone should notify the sales team about the changes.
|
|
369
|
-
Tom: Also, we need to migrate the legacy accounts. Maria, can you handle that?
|
|
370
|
-
Maria: Sure, I'll look into it.
|
|
371
|
-
TRANSCRIPT
|
|
372
|
-
|
|
373
|
-
puts "\n--- Full power: Pipeline + Schema + Invariants + Real LLM ---"
|
|
374
|
-
result = MeetingFollowUpPipeline.run(transcript,
|
|
375
|
-
context: { model: "gpt-4.1-mini", temperature: 0.0 })
|
|
376
|
-
|
|
377
|
-
puts "Pipeline status: #{result.status}"
|
|
378
|
-
puts "Steps run: #{result.step_results.length}"
|
|
379
|
-
|
|
380
|
-
if result.ok?
|
|
381
|
-
puts "\nExtracted: #{result.outputs_by_step[:extract][:decisions]&.length} decisions, " \
|
|
382
|
-
"#{result.outputs_by_step[:extract][:action_items]&.length} action items"
|
|
383
|
-
|
|
384
|
-
ambiguous = result.outputs_by_step[:analyze][:analyses]&.select { |a| a[:status] == "ambiguous" }
|
|
385
|
-
puts "Ambiguous: #{ambiguous&.length} items need clarification"
|
|
386
|
-
puts "Email subj: #{result.outputs_by_step[:email][:subject]}"
|
|
387
|
-
else
|
|
388
|
-
puts "FAILED at: #{result.failed_step}"
|
|
389
|
-
failed = result.step_results.last[:result]
|
|
390
|
-
puts "Errors: #{failed.validation_errors}"
|
|
391
|
-
end
|
|
392
|
-
|
|
393
|
-
# =============================================================================
|
|
394
|
-
# SUMMARY
|
|
395
|
-
#
|
|
396
|
-
# 1. Configure ruby_llm (API keys)
|
|
397
|
-
# 2. Set adapter: Adapters::RubyLLM.new
|
|
398
|
-
# 3. Define steps exactly as before (types, prompt, contract)
|
|
399
|
-
# 4. Run — real LLM call with full contract enforcement
|
|
400
|
-
# 5. Override model/temperature/max_tokens per call via context
|
|
401
|
-
# 6. Switch providers by changing the model name — everything else stays
|
|
402
|
-
# 7. Combine ALL features in a single step: system, rules, sections,
|
|
403
|
-
# examples, hash input, 1-arity + 2-arity invariants
|
|
404
|
-
# 8. Error handling — contract enforcement with real LLM responses
|
|
405
|
-
# 9. Full power single step — AnalyzeTicket with every feature
|
|
406
|
-
# 10. Full power pipeline — 3 steps, schemas, invariants, real LLM
|
|
407
|
-
#
|
|
408
|
-
# The step definition is always provider-agnostic.
|
|
409
|
-
# Swap adapters between Test (specs) and RubyLLM (production).
|
|
410
|
-
# =============================================================================
|