ruby_llm-contract 0.7.0 → 0.7.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +66 -0
- data/Gemfile.lock +2 -2
- data/README.md +51 -252
- data/examples/00_basics.rb +110 -428
- data/examples/01_fallback_showcase.rb +208 -0
- data/examples/02_real_llm_minimal.rb +45 -0
- data/examples/03_summarize_with_keywords.rb +128 -0
- data/examples/04_summarize_and_translate.rb +196 -0
- data/examples/05_eval_dataset.rb +144 -0
- data/examples/06_retry_variants.rb +147 -0
- data/examples/README.md +20 -128
- data/lib/ruby_llm/contract/eval/model_comparison.rb +4 -4
- data/lib/ruby_llm/contract/eval/retry_optimizer.rb +7 -3
- data/lib/ruby_llm/contract/step/base.rb +28 -12
- data/lib/ruby_llm/contract/version.rb +1 -1
- metadata +7 -10
- data/examples/01_classify_threads.rb +0 -220
- data/examples/02_generate_comment.rb +0 -203
- data/examples/03_target_audience.rb +0 -201
- data/examples/04_real_llm.rb +0 -410
- data/examples/05_output_schema.rb +0 -258
- data/examples/07_keyword_extraction.rb +0 -239
- data/examples/08_translation.rb +0 -353
- data/examples/09_eval_dataset.rb +0 -287
- data/examples/10_reddit_full_showcase.rb +0 -363
data/examples/00_basics.rb
CHANGED
|
@@ -1,500 +1,182 @@
|
|
|
1
1
|
# frozen_string_literal: true
|
|
2
2
|
|
|
3
3
|
# =============================================================================
|
|
4
|
-
# EXAMPLE 0:
|
|
4
|
+
# EXAMPLE 0: SummarizeArticle — from plain prompt to full contract
|
|
5
5
|
#
|
|
6
|
-
#
|
|
7
|
-
# and
|
|
6
|
+
# One step, seven incremental layers. Each layer adds exactly one capability
|
|
7
|
+
# and shows the line of code that unlocks it. Start at Step 1, read top to
|
|
8
|
+
# bottom, stop at the layer that matches your project.
|
|
8
9
|
# =============================================================================
|
|
9
10
|
|
|
10
11
|
require_relative "../lib/ruby_llm/contract"
|
|
11
12
|
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
#
|
|
28
|
-
# Or with ruby_llm (one-liner, but still no validation):
|
|
29
|
-
#
|
|
30
|
-
# RubyLLM.chat.ask("Classify the sentiment: #{text}")
|
|
31
|
-
#
|
|
32
|
-
# PROBLEM: no validation, no types, no trace, no structure
|
|
33
|
-
# =============================================================================
|
|
34
|
-
|
|
35
|
-
# Option A: with output_schema (recommended — simplest)
|
|
36
|
-
class SimpleSentiment < RubyLLM::Contract::Step::Base
|
|
37
|
-
input_type String # plain Ruby class works!
|
|
38
|
-
|
|
39
|
-
output_schema do
|
|
40
|
-
string :sentiment
|
|
41
|
-
end
|
|
42
|
-
|
|
43
|
-
prompt do
|
|
44
|
-
user "Classify the sentiment of this text as positive, negative, or neutral. Return JSON.\n\n{input}"
|
|
45
|
-
end
|
|
46
|
-
end
|
|
47
|
-
|
|
48
|
-
result = SimpleSentiment.run("I love this product!")
|
|
49
|
-
result.status # => :ok
|
|
50
|
-
result.parsed_output # => {sentiment: "positive"}
|
|
51
|
-
|
|
52
|
-
# Option B: with output_type (plain Ruby class — JSON parsing is implicit for Hash)
|
|
53
|
-
class SimpleSentimentDryTypes < RubyLLM::Contract::Step::Base
|
|
54
|
-
input_type String
|
|
55
|
-
output_type Hash
|
|
13
|
+
ARTICLE = <<~ARTICLE
|
|
14
|
+
Ruby 3.4 ships with frozen string literals on by default, measurable YJIT
|
|
15
|
+
speedups on Rails workloads, and tightened Warning.warn category filtering.
|
|
16
|
+
Parser fixes and faster keyword argument handling land alongside.
|
|
17
|
+
ARTICLE
|
|
18
|
+
|
|
19
|
+
CANNED = {
|
|
20
|
+
tldr: "Ruby 3.4 brings frozen string literals by default, YJIT speedups, parser fixes.",
|
|
21
|
+
takeaways: [
|
|
22
|
+
"Frozen string literals are the default in Ruby 3.4",
|
|
23
|
+
"YJIT delivers measurable Rails speedups",
|
|
24
|
+
"Parser fixes and keyword argument handling improve"
|
|
25
|
+
],
|
|
26
|
+
tone: "analytical"
|
|
27
|
+
}.freeze
|
|
56
28
|
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
end
|
|
60
|
-
end
|
|
61
|
-
|
|
62
|
-
result = SimpleSentimentDryTypes.run("I love this product!")
|
|
63
|
-
result.status # => :ok
|
|
64
|
-
result.parsed_output # => {sentiment: "positive"}
|
|
65
|
-
|
|
66
|
-
# =============================================================================
|
|
67
|
-
# STEP 2: Add system message — separate instructions from data
|
|
68
|
-
#
|
|
69
|
-
# BEFORE:
|
|
70
|
-
# Everything in one string — instructions and data mixed together
|
|
71
|
-
#
|
|
72
|
-
# AFTER:
|
|
73
|
-
# system = instructions (constant)
|
|
74
|
-
# user = data (variable)
|
|
75
|
-
# =============================================================================
|
|
76
|
-
|
|
77
|
-
class SentimentWithSystem < RubyLLM::Contract::Step::Base
|
|
78
|
-
input_type String
|
|
79
|
-
|
|
80
|
-
output_schema do
|
|
81
|
-
string :sentiment
|
|
82
|
-
end
|
|
83
|
-
|
|
84
|
-
prompt do
|
|
85
|
-
system "Classify the sentiment of the user's text."
|
|
86
|
-
user "{input}"
|
|
87
|
-
end
|
|
29
|
+
RubyLLM::Contract.configure do |c|
|
|
30
|
+
c.default_adapter = RubyLLM::Contract::Adapters::Test.new(response: CANNED)
|
|
88
31
|
end
|
|
89
32
|
|
|
90
|
-
result = SentimentWithSystem.run("I love this product!")
|
|
91
|
-
result.status # => :ok
|
|
92
|
-
result.parsed_output # => {sentiment: "positive"}
|
|
93
|
-
|
|
94
33
|
# =============================================================================
|
|
95
|
-
# STEP
|
|
96
|
-
#
|
|
97
|
-
# Rules are individual requirements. One rule per concern.
|
|
98
|
-
# Much clearer than a single wall of text.
|
|
34
|
+
# STEP 1 — Minimal: prompt + output_schema
|
|
35
|
+
# The step enforces JSON shape. No business rules yet, no retry.
|
|
99
36
|
# =============================================================================
|
|
100
37
|
|
|
101
|
-
RubyLLM::Contract
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
end
|
|
38
|
+
class SummarizeArticleMinimal < RubyLLM::Contract::Step::Base
|
|
39
|
+
prompt <<~PROMPT
|
|
40
|
+
Summarize this article for a UI card. Return a short TL;DR,
|
|
41
|
+
3 to 5 key takeaways, and a tone label.
|
|
106
42
|
|
|
107
|
-
|
|
108
|
-
|
|
43
|
+
{input}
|
|
44
|
+
PROMPT
|
|
109
45
|
|
|
110
46
|
output_schema do
|
|
111
|
-
string :
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
prompt do
|
|
116
|
-
system "You are a sentiment classifier."
|
|
117
|
-
rule "Return JSON only."
|
|
118
|
-
rule "Use exactly one of: positive, negative, neutral."
|
|
119
|
-
rule "Include a confidence score from 0.0 to 1.0."
|
|
120
|
-
user "{input}"
|
|
47
|
+
string :tldr
|
|
48
|
+
array :takeaways, of: :string, min_items: 3, max_items: 5
|
|
49
|
+
string :tone, enum: %w[neutral positive negative analytical]
|
|
121
50
|
end
|
|
122
51
|
end
|
|
123
52
|
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
|
|
53
|
+
r = SummarizeArticleMinimal.run(ARTICLE)
|
|
54
|
+
r.status # => :ok
|
|
55
|
+
r.parsed_output # => {tldr: "...", takeaways: [...], tone: "analytical"}
|
|
127
56
|
|
|
128
57
|
# =============================================================================
|
|
129
|
-
# STEP
|
|
130
|
-
#
|
|
131
|
-
#
|
|
132
|
-
# that schema can't express: conditional rules, cross-field checks, etc.
|
|
58
|
+
# STEP 2 — Add a business rule (validate) that schema cannot express
|
|
59
|
+
# Schema says "takeaways is an array of 3–5 strings". Nothing there says
|
|
60
|
+
# "uniqueness" or "TL;DR fits the card". That is what validate blocks are for.
|
|
133
61
|
# =============================================================================
|
|
134
62
|
|
|
135
|
-
class
|
|
136
|
-
|
|
137
|
-
|
|
138
|
-
output_schema do
|
|
139
|
-
string :sentiment, enum: %w[positive negative neutral]
|
|
140
|
-
number :confidence, minimum: 0.0, maximum: 1.0
|
|
141
|
-
end
|
|
142
|
-
|
|
143
|
-
prompt do
|
|
144
|
-
system "You are a sentiment classifier."
|
|
145
|
-
rule "Return JSON only."
|
|
146
|
-
rule "Use exactly one of: positive, negative, neutral."
|
|
147
|
-
rule "Include a confidence score from 0.0 to 1.0."
|
|
148
|
-
user "{input}"
|
|
149
|
-
end
|
|
150
|
-
|
|
151
|
-
# Schema already enforces enum + range. Validate adds custom logic:
|
|
152
|
-
validate("high confidence required for extreme sentiments") do |o|
|
|
153
|
-
next true unless %w[positive negative].include?(o[:sentiment])
|
|
154
|
-
|
|
155
|
-
o[:confidence] >= 0.7
|
|
156
|
-
end
|
|
63
|
+
class SummarizeArticleValidated < SummarizeArticleMinimal
|
|
64
|
+
validate("TL;DR fits the card") { |o, _| o[:tldr].length <= 200 }
|
|
65
|
+
validate("takeaways are unique") { |o, _| o[:takeaways].uniq.size == o[:takeaways].size }
|
|
157
66
|
end
|
|
158
67
|
|
|
159
|
-
|
|
160
|
-
|
|
161
|
-
|
|
162
|
-
result.parsed_output # => {sentiment: "positive", confidence: 0.95}
|
|
163
|
-
|
|
164
|
-
# Model returns low confidence for extreme sentiment — invariant catches it:
|
|
165
|
-
RubyLLM::Contract.configure do |c|
|
|
166
|
-
c.default_adapter = RubyLLM::Contract::Adapters::Test.new(
|
|
167
|
-
response: '{"sentiment": "positive", "confidence": 0.3}'
|
|
168
|
-
)
|
|
169
|
-
end
|
|
170
|
-
|
|
171
|
-
result = SentimentValidated.run("I love this product!")
|
|
172
|
-
result.status # => :validation_failed
|
|
173
|
-
result.validation_errors # => ["high confidence required for extreme sentiments"]
|
|
174
|
-
|
|
175
|
-
# Model returns non-JSON:
|
|
176
|
-
RubyLLM::Contract.configure do |c|
|
|
177
|
-
c.default_adapter = RubyLLM::Contract::Adapters::Test.new(
|
|
178
|
-
response: "I think it's positive"
|
|
179
|
-
)
|
|
180
|
-
end
|
|
181
|
-
|
|
182
|
-
result = SentimentValidated.run("I love this product!")
|
|
183
|
-
result.status # => :parse_error
|
|
184
|
-
result.validation_errors # => ["Failed to parse JSON: ..."]
|
|
68
|
+
r = SummarizeArticleValidated.run(ARTICLE)
|
|
69
|
+
r.status # => :ok
|
|
70
|
+
r.validation_errors # => []
|
|
185
71
|
|
|
186
72
|
# =============================================================================
|
|
187
|
-
# STEP
|
|
188
|
-
#
|
|
189
|
-
#
|
|
190
|
-
# The model better understands the expected format.
|
|
73
|
+
# STEP 3 — Structured prompt (prompt AST: system, rule, section, user)
|
|
74
|
+
# Replaces a heredoc. Individual nodes are reorderable, diffable, and
|
|
75
|
+
# inspectable — useful when the prompt grows beyond a few lines.
|
|
191
76
|
# =============================================================================
|
|
192
77
|
|
|
193
|
-
RubyLLM::Contract
|
|
194
|
-
c.default_adapter = RubyLLM::Contract::Adapters::Test.new(
|
|
195
|
-
response: '{"sentiment": "positive", "confidence": 0.92}'
|
|
196
|
-
)
|
|
197
|
-
end
|
|
198
|
-
|
|
199
|
-
class SentimentWithExample < RubyLLM::Contract::Step::Base
|
|
200
|
-
input_type String
|
|
201
|
-
|
|
202
|
-
output_schema do
|
|
203
|
-
string :sentiment, enum: %w[positive negative neutral]
|
|
204
|
-
number :confidence, minimum: 0.0, maximum: 1.0
|
|
205
|
-
end
|
|
206
|
-
|
|
78
|
+
class SummarizeArticleStructured < RubyLLM::Contract::Step::Base
|
|
207
79
|
prompt do
|
|
208
|
-
system "You
|
|
209
|
-
rule
|
|
210
|
-
rule
|
|
211
|
-
|
|
212
|
-
example input: "This is terrible", output: '{"sentiment": "negative", "confidence": 0.9}'
|
|
213
|
-
example input: "It works fine I guess", output: '{"sentiment": "neutral", "confidence": 0.6}'
|
|
214
|
-
user "{input}"
|
|
80
|
+
system "You summarize articles for a UI card."
|
|
81
|
+
rule "Return valid JSON only."
|
|
82
|
+
rule "Keep the TL;DR under 200 characters."
|
|
83
|
+
user "{input}"
|
|
215
84
|
end
|
|
216
|
-
end
|
|
217
|
-
|
|
218
|
-
result = SentimentWithExample.run("I love this product!")
|
|
219
|
-
result.status # => :ok
|
|
220
|
-
result.parsed_output # => {sentiment: "positive", confidence: 0.92}
|
|
221
|
-
|
|
222
|
-
# =============================================================================
|
|
223
|
-
# STEP 6: Sections — replace heredoc string with structured AST
|
|
224
|
-
#
|
|
225
|
-
# BEFORE (typical heredoc prompt — one big string):
|
|
226
|
-
#
|
|
227
|
-
# prompt = <<~PROMPT # AFTER:
|
|
228
|
-
# You are a sentiment classifier for customer support. # system "You are a sentiment classifier for customer support."
|
|
229
|
-
# Return JSON with sentiment, confidence, and reason. # rule "Return JSON with sentiment, confidence, and reason."
|
|
230
|
-
# #
|
|
231
|
-
# [CONTEXT] # section "CONTEXT",
|
|
232
|
-
# We sell software for freelancers. # "We sell software for freelancers."
|
|
233
|
-
# #
|
|
234
|
-
# [SCORING GUIDE] # section "SCORING GUIDE",
|
|
235
|
-
# negative = complaint or frustration # "negative = complaint or frustration\n
|
|
236
|
-
# positive = praise or thanks # positive = praise or thanks\n
|
|
237
|
-
# neutral = question or factual statement # neutral = question or factual statement"
|
|
238
|
-
# #
|
|
239
|
-
# Classify this: #{text} # user "Classify this: {input}"
|
|
240
|
-
# PROMPT #
|
|
241
|
-
#
|
|
242
|
-
# PROBLEM: one big string — can't reorder, diff, or reuse individual sections
|
|
243
|
-
# AFTER: each part is a separate node in the prompt AST
|
|
244
|
-
# =============================================================================
|
|
245
|
-
|
|
246
|
-
RubyLLM::Contract.configure do |c|
|
|
247
|
-
c.default_adapter = RubyLLM::Contract::Adapters::Test.new(
|
|
248
|
-
response: '{"sentiment": "negative", "confidence": 0.85, "reason": "product complaint"}'
|
|
249
|
-
)
|
|
250
|
-
end
|
|
251
|
-
|
|
252
|
-
class SentimentWithSections < RubyLLM::Contract::Step::Base
|
|
253
|
-
input_type String
|
|
254
85
|
|
|
255
86
|
output_schema do
|
|
256
|
-
string :
|
|
257
|
-
|
|
258
|
-
string :
|
|
259
|
-
end
|
|
260
|
-
|
|
261
|
-
prompt do
|
|
262
|
-
system "You are a sentiment classifier for customer support."
|
|
263
|
-
rule "Return JSON with sentiment, confidence, and reason."
|
|
264
|
-
|
|
265
|
-
section "CONTEXT", "We sell software for freelancers."
|
|
266
|
-
section "SCORING GUIDE",
|
|
267
|
-
"negative = complaint or frustration\npositive = praise or thanks\nneutral = question or factual statement"
|
|
268
|
-
|
|
269
|
-
user "Classify this: {input}"
|
|
87
|
+
string :tldr
|
|
88
|
+
array :takeaways, of: :string, min_items: 3, max_items: 5
|
|
89
|
+
string :tone, enum: %w[neutral positive negative analytical]
|
|
270
90
|
end
|
|
271
91
|
end
|
|
272
92
|
|
|
273
|
-
result = SentimentWithSections.run("Your billing page is broken again!")
|
|
274
|
-
result.status # => :ok
|
|
275
|
-
result.parsed_output # => {sentiment: "negative", confidence: 0.85, reason: "product complaint"}
|
|
276
|
-
|
|
277
93
|
# =============================================================================
|
|
278
|
-
# STEP
|
|
279
|
-
#
|
|
280
|
-
#
|
|
281
|
-
# {title} resolves to input[:title], {language} to input[:language], etc.
|
|
282
|
-
# No manual string building needed.
|
|
94
|
+
# STEP 4 — Hash input with variable interpolation
|
|
95
|
+
# When you need more than raw text (audience, language, tenant), take a Hash
|
|
96
|
+
# and reference its keys directly in the prompt.
|
|
283
97
|
# =============================================================================
|
|
284
98
|
|
|
285
|
-
RubyLLM::Contract
|
|
286
|
-
c.default_adapter = RubyLLM::Contract::Adapters::Test.new(
|
|
287
|
-
response: '{"category": "billing", "priority": "high"}'
|
|
288
|
-
)
|
|
289
|
-
end
|
|
290
|
-
|
|
291
|
-
class ClassifyTicket < RubyLLM::Contract::Step::Base
|
|
99
|
+
class SummarizeArticleMultiField < RubyLLM::Contract::Step::Base
|
|
292
100
|
input_type RubyLLM::Contract::Types::Hash.schema(
|
|
293
|
-
|
|
294
|
-
|
|
101
|
+
article: RubyLLM::Contract::Types::String,
|
|
102
|
+
audience: RubyLLM::Contract::Types::String,
|
|
295
103
|
language: RubyLLM::Contract::Types::String
|
|
296
104
|
)
|
|
297
105
|
|
|
298
|
-
|
|
299
|
-
|
|
300
|
-
|
|
106
|
+
prompt do
|
|
107
|
+
system "You summarize articles for a UI card."
|
|
108
|
+
rule "Write the TL;DR and takeaways in {language}."
|
|
109
|
+
section "AUDIENCE", "{audience}"
|
|
110
|
+
user "{article}"
|
|
301
111
|
end
|
|
302
112
|
|
|
303
|
-
|
|
304
|
-
|
|
305
|
-
|
|
306
|
-
|
|
307
|
-
user "Title: {title}\n\nBody: {body}"
|
|
113
|
+
output_schema do
|
|
114
|
+
string :tldr
|
|
115
|
+
array :takeaways, of: :string, min_items: 3, max_items: 5
|
|
116
|
+
string :tone, enum: %w[neutral positive negative analytical]
|
|
308
117
|
end
|
|
309
118
|
end
|
|
310
119
|
|
|
311
|
-
result = ClassifyTicket.run(
|
|
312
|
-
{ title: "Can't update credit card", body: "Payment page gives error 500", language: "en" }
|
|
313
|
-
)
|
|
314
|
-
result.status # => :ok
|
|
315
|
-
result.parsed_output # => {category: "billing", priority: "high"}
|
|
316
|
-
|
|
317
120
|
# =============================================================================
|
|
318
|
-
# STEP
|
|
319
|
-
#
|
|
320
|
-
#
|
|
321
|
-
# A 2-arity invariant receives both |output, input| so you can cross-validate.
|
|
121
|
+
# STEP 5 — 2-arity validate: check the output against the input
|
|
122
|
+
# Catches "lazy" models that echo the article verbatim into the TL;DR.
|
|
123
|
+
# The block receives |output, input| — pass the input-side check too.
|
|
322
124
|
# =============================================================================
|
|
323
125
|
|
|
324
|
-
|
|
325
|
-
|
|
326
|
-
|
|
327
|
-
)
|
|
328
|
-
end
|
|
329
|
-
|
|
330
|
-
class Translate < RubyLLM::Contract::Step::Base
|
|
331
|
-
input_type RubyLLM::Contract::Types::Hash.schema(
|
|
332
|
-
text: RubyLLM::Contract::Types::String,
|
|
333
|
-
target_lang: RubyLLM::Contract::Types::String
|
|
334
|
-
)
|
|
335
|
-
|
|
336
|
-
output_schema do
|
|
337
|
-
string :translation, min_length: 1
|
|
338
|
-
string :source_lang
|
|
339
|
-
string :target_lang
|
|
340
|
-
end
|
|
341
|
-
|
|
342
|
-
prompt do
|
|
343
|
-
system "Translate the text to the target language."
|
|
344
|
-
rule "Return JSON with translation, source_lang, and target_lang."
|
|
345
|
-
user "Translate to {target_lang}: {text}"
|
|
346
|
-
end
|
|
347
|
-
|
|
348
|
-
# Schema handles: translation non-empty, all fields present
|
|
349
|
-
# 2-arity validate: cross-validate output against input
|
|
350
|
-
validate("target_lang must match requested language") do |output, input|
|
|
351
|
-
output[:target_lang] == input[:target_lang]
|
|
126
|
+
class SummarizeArticleFaithful < SummarizeArticleValidated
|
|
127
|
+
validate("TL;DR is shorter than the article") do |output, input|
|
|
128
|
+
output[:tldr].length < input.length / 2
|
|
352
129
|
end
|
|
353
130
|
end
|
|
354
131
|
|
|
355
|
-
result = Translate.run({ text: "Hello world", target_lang: "fr" })
|
|
356
|
-
result.status # => :ok
|
|
357
|
-
result.parsed_output # => {translation: "Bonjour le monde", source_lang: "en", target_lang: "fr"}
|
|
358
|
-
|
|
359
|
-
# What if model returns wrong target language?
|
|
360
|
-
RubyLLM::Contract.configure do |c|
|
|
361
|
-
c.default_adapter = RubyLLM::Contract::Adapters::Test.new(
|
|
362
|
-
response: '{"translation": "Hola mundo", "source_lang": "en", "target_lang": "es"}'
|
|
363
|
-
)
|
|
364
|
-
end
|
|
365
|
-
|
|
366
|
-
result = Translate.run({ text: "Hello world", target_lang: "fr" })
|
|
367
|
-
result.status # => :validation_failed
|
|
368
|
-
result.validation_errors # => ["target_lang must match requested language"]
|
|
369
|
-
|
|
370
132
|
# =============================================================================
|
|
371
|
-
# STEP
|
|
372
|
-
#
|
|
373
|
-
#
|
|
374
|
-
# Useful for: testing, switching models, A/B testing prompts.
|
|
133
|
+
# STEP 6 — Retry with model fallback
|
|
134
|
+
# Start on the cheapest model. If validate or schema rejects the output,
|
|
135
|
+
# the gem automatically retries on the next model in the list.
|
|
375
136
|
# =============================================================================
|
|
376
137
|
|
|
377
|
-
|
|
378
|
-
|
|
379
|
-
c.default_model = "gpt-4.1-mini"
|
|
138
|
+
class SummarizeArticleWithRetry < SummarizeArticleValidated
|
|
139
|
+
retry_policy models: %w[gpt-5-nano gpt-5-mini gpt-5]
|
|
380
140
|
end
|
|
381
141
|
|
|
382
|
-
# Uses global defaults:
|
|
383
|
-
result = SimpleSentiment.run("I love this product!")
|
|
384
|
-
result.status # => :ok
|
|
385
|
-
result.trace[:model] # => "gpt-4.1-mini"
|
|
386
|
-
|
|
387
|
-
# Override adapter and model for this specific call:
|
|
388
|
-
other_adapter = RubyLLM::Contract::Adapters::Test.new(response: '{"sentiment": "neutral"}')
|
|
389
|
-
result = SimpleSentiment.run("I love this product!", context: { adapter: other_adapter, model: "gpt-5" })
|
|
390
|
-
result.status # => :ok
|
|
391
|
-
result.parsed_output # => {sentiment: "neutral"}
|
|
392
|
-
result.trace[:model] # => "gpt-5"
|
|
393
|
-
|
|
394
142
|
# =============================================================================
|
|
395
|
-
# STEP
|
|
396
|
-
#
|
|
397
|
-
#
|
|
143
|
+
# STEP 7 — Inspect the Result: status, parsed_output, trace, per-attempt
|
|
144
|
+
# Every run returns a value object with everything you need to log, debug,
|
|
145
|
+
# or surface in an admin UI.
|
|
398
146
|
# =============================================================================
|
|
399
147
|
|
|
400
|
-
|
|
401
|
-
|
|
402
|
-
|
|
403
|
-
|
|
404
|
-
|
|
405
|
-
|
|
406
|
-
|
|
407
|
-
|
|
408
|
-
result.validation_errors # => []
|
|
409
|
-
result.trace[:model] # => "gpt-4.1-mini"
|
|
410
|
-
result.trace[:latency_ms] # => 0 (instant with test adapter)
|
|
411
|
-
result.trace[:messages] # => [{role: :system, content: "..."}, {role: :user, content: "..."}]
|
|
412
|
-
|
|
413
|
-
# On failure, you still get everything for debugging:
|
|
414
|
-
bad_adapter = RubyLLM::Contract::Adapters::Test.new(response: '{"sentiment": "positive", "confidence": 0.1}')
|
|
415
|
-
result = SentimentValidated.run("I love this product!", context: { adapter: bad_adapter })
|
|
416
|
-
|
|
417
|
-
result.status # => :validation_failed
|
|
418
|
-
result.ok? # => false
|
|
419
|
-
result.failed? # => true
|
|
420
|
-
result.raw_output # => '{"sentiment": "positive", "confidence": 0.1}'
|
|
421
|
-
result.parsed_output # => {sentiment: "positive", confidence: 0.1}
|
|
422
|
-
result.validation_errors # => ["high confidence required for extreme sentiments"]
|
|
148
|
+
r = SummarizeArticleWithRetry.run(ARTICLE)
|
|
149
|
+
r.status # => :ok
|
|
150
|
+
r.ok? # => true
|
|
151
|
+
r.parsed_output # => {tldr: "...", takeaways: [...], tone: "analytical"}
|
|
152
|
+
r.validation_errors # => []
|
|
153
|
+
r.trace[:model] # => "gpt-5-nano" (first model that passed)
|
|
154
|
+
r.trace[:attempts] # => [{attempt: 1, model: "gpt-5-nano", status: :ok, ...}]
|
|
155
|
+
r.trace[:cost] # => sum of per-attempt costs
|
|
423
156
|
|
|
424
157
|
# =============================================================================
|
|
425
|
-
# STEP
|
|
158
|
+
# STEP 8 — Swap the Test adapter for a real LLM
|
|
159
|
+
# The step itself does not change. Point ruby_llm at your provider and
|
|
160
|
+
# pass Adapters::RubyLLM.new in context.
|
|
426
161
|
#
|
|
427
|
-
#
|
|
428
|
-
# Output of step N automatically becomes input to step N+1.
|
|
429
|
-
# If any step fails, execution halts immediately.
|
|
162
|
+
# See examples/02_real_llm_minimal.rb for a runnable ~30-line version.
|
|
430
163
|
# =============================================================================
|
|
431
164
|
|
|
432
|
-
#
|
|
433
|
-
|
|
434
|
-
|
|
435
|
-
|
|
436
|
-
|
|
437
|
-
|
|
438
|
-
|
|
439
|
-
end
|
|
440
|
-
|
|
441
|
-
prompt do
|
|
442
|
-
system "Classify sentiment and return the original text."
|
|
443
|
-
user "{input}"
|
|
444
|
-
end
|
|
445
|
-
end
|
|
446
|
-
|
|
447
|
-
# Step B: generate a response based on sentiment
|
|
448
|
-
class PipelineRespond < RubyLLM::Contract::Step::Base
|
|
449
|
-
input_type Hash
|
|
450
|
-
|
|
451
|
-
output_schema do
|
|
452
|
-
string :response
|
|
453
|
-
string :tone
|
|
454
|
-
end
|
|
455
|
-
|
|
456
|
-
prompt do
|
|
457
|
-
system "Generate a customer support response matching the sentiment."
|
|
458
|
-
user "Text: {text}\nSentiment: {sentiment}"
|
|
459
|
-
end
|
|
460
|
-
end
|
|
461
|
-
|
|
462
|
-
# Pipeline: sentiment → respond
|
|
463
|
-
class SupportPipeline < RubyLLM::Contract::Pipeline::Base
|
|
464
|
-
step PipelineSentiment, as: :classify
|
|
465
|
-
step PipelineRespond, as: :respond
|
|
466
|
-
end
|
|
467
|
-
|
|
468
|
-
# Happy path:
|
|
469
|
-
RubyLLM::Contract.configure do |c|
|
|
470
|
-
c.default_adapter = RubyLLM::Contract::Adapters::Test.new(
|
|
471
|
-
response: '{"text": "I love this product!", "sentiment": "positive"}'
|
|
472
|
-
)
|
|
473
|
-
end
|
|
474
|
-
|
|
475
|
-
# NOTE: with Test adapter, both steps get the same canned response.
|
|
476
|
-
# With a real LLM, each step would get a different response.
|
|
477
|
-
result = SupportPipeline.run("I love this product!")
|
|
478
|
-
result.ok? # => true
|
|
479
|
-
result.outputs_by_step[:classify] # => {text: "I love this product!", sentiment: "positive"}
|
|
480
|
-
result.outputs_by_step[:respond] # => {text: "I love this product!", sentiment: "positive"}
|
|
481
|
-
result.step_results.length # => 2
|
|
165
|
+
# RubyLLM.configure { |c| c.openai_api_key = ENV.fetch("OPENAI_API_KEY") }
|
|
166
|
+
# adapter = RubyLLM::Contract::Adapters::RubyLLM.new
|
|
167
|
+
# result = SummarizeArticleWithRetry.run(article_text, context: { adapter: adapter })
|
|
168
|
+
#
|
|
169
|
+
# Switch provider per call — ruby_llm resolves the provider from the model name:
|
|
170
|
+
# SummarizeArticleWithRetry.run(article_text, context: { adapter: adapter, model: "claude-sonnet-4-6" })
|
|
171
|
+
# SummarizeArticleWithRetry.run(article_text, context: { adapter: adapter, model: "gemma3:4b" }) # local Ollama
|
|
482
172
|
|
|
483
173
|
# =============================================================================
|
|
484
|
-
#
|
|
485
|
-
#
|
|
486
|
-
# Step 1: user "{input}" — plain string, nothing else
|
|
487
|
-
# Step 2: system + user — separate instructions from data
|
|
488
|
-
# Step 3: + output_schema — declarative output structure
|
|
489
|
-
# Step 4: + invariants — custom business logic on top
|
|
490
|
-
# Step 5: + examples — few-shot
|
|
491
|
-
# Step 6: + sections — labeled context blocks
|
|
492
|
-
# Step 7: Hash input — multiple fields, auto-interpolation
|
|
493
|
-
# Step 8: 2-arity invariants — cross-validate output vs input
|
|
494
|
-
# Step 9: context override — per-run adapter and model
|
|
495
|
-
# Step 10: StepResult — full status, output, errors, trace
|
|
496
|
-
# Step 11: Pipeline — chain steps with fail-fast
|
|
174
|
+
# Where to go next
|
|
497
175
|
#
|
|
498
|
-
#
|
|
499
|
-
#
|
|
176
|
+
# 01_fallback_showcase.rb — see the retry loop run in 30 seconds
|
|
177
|
+
# 02_real_llm_minimal.rb — swap Test adapter for Adapters::RubyLLM
|
|
178
|
+
# 03_summarize_with_keywords.rb — growing prompt: add a keywords field
|
|
179
|
+
# 04_summarize_and_translate.rb — pipeline: summarize → translate → review
|
|
180
|
+
# 05_eval_dataset.rb — define_eval, add_case, regression detection
|
|
181
|
+
# 06_retry_variants.rb — attempts: 3, reasoning_effort, cross-provider
|
|
500
182
|
# =============================================================================
|