ruby_llm-contract 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (89) hide show
  1. checksums.yaml +7 -0
  2. data/.rspec +3 -0
  3. data/.rubocop.yml +55 -0
  4. data/CHANGELOG.md +76 -0
  5. data/Gemfile +11 -0
  6. data/Gemfile.lock +176 -0
  7. data/LICENSE +21 -0
  8. data/README.md +154 -0
  9. data/Rakefile +8 -0
  10. data/examples/00_basics.rb +500 -0
  11. data/examples/01_classify_threads.rb +220 -0
  12. data/examples/02_generate_comment.rb +203 -0
  13. data/examples/03_target_audience.rb +201 -0
  14. data/examples/04_real_llm.rb +410 -0
  15. data/examples/05_output_schema.rb +258 -0
  16. data/examples/07_keyword_extraction.rb +239 -0
  17. data/examples/08_translation.rb +353 -0
  18. data/examples/09_eval_dataset.rb +287 -0
  19. data/examples/10_reddit_full_showcase.rb +363 -0
  20. data/examples/README.md +140 -0
  21. data/lib/ruby_llm/contract/adapters/base.rb +13 -0
  22. data/lib/ruby_llm/contract/adapters/response.rb +17 -0
  23. data/lib/ruby_llm/contract/adapters/ruby_llm.rb +94 -0
  24. data/lib/ruby_llm/contract/adapters/test.rb +44 -0
  25. data/lib/ruby_llm/contract/adapters.rb +6 -0
  26. data/lib/ruby_llm/contract/concerns/deep_symbolize.rb +17 -0
  27. data/lib/ruby_llm/contract/concerns/eval_host.rb +109 -0
  28. data/lib/ruby_llm/contract/concerns/trace_equality.rb +15 -0
  29. data/lib/ruby_llm/contract/concerns/usage_aggregator.rb +43 -0
  30. data/lib/ruby_llm/contract/configuration.rb +21 -0
  31. data/lib/ruby_llm/contract/contract/definition.rb +39 -0
  32. data/lib/ruby_llm/contract/contract/invariant.rb +23 -0
  33. data/lib/ruby_llm/contract/contract/parser.rb +143 -0
  34. data/lib/ruby_llm/contract/contract/schema_validator.rb +239 -0
  35. data/lib/ruby_llm/contract/contract/validator.rb +104 -0
  36. data/lib/ruby_llm/contract/contract.rb +7 -0
  37. data/lib/ruby_llm/contract/cost_calculator.rb +38 -0
  38. data/lib/ruby_llm/contract/dsl.rb +13 -0
  39. data/lib/ruby_llm/contract/errors.rb +19 -0
  40. data/lib/ruby_llm/contract/eval/case_result.rb +76 -0
  41. data/lib/ruby_llm/contract/eval/contract_detail_builder.rb +47 -0
  42. data/lib/ruby_llm/contract/eval/dataset.rb +53 -0
  43. data/lib/ruby_llm/contract/eval/eval_definition.rb +112 -0
  44. data/lib/ruby_llm/contract/eval/evaluation_result.rb +27 -0
  45. data/lib/ruby_llm/contract/eval/evaluator/exact.rb +20 -0
  46. data/lib/ruby_llm/contract/eval/evaluator/json_includes.rb +58 -0
  47. data/lib/ruby_llm/contract/eval/evaluator/proc_evaluator.rb +40 -0
  48. data/lib/ruby_llm/contract/eval/evaluator/regex.rb +27 -0
  49. data/lib/ruby_llm/contract/eval/model_comparison.rb +80 -0
  50. data/lib/ruby_llm/contract/eval/pipeline_result_adapter.rb +15 -0
  51. data/lib/ruby_llm/contract/eval/report.rb +115 -0
  52. data/lib/ruby_llm/contract/eval/runner.rb +162 -0
  53. data/lib/ruby_llm/contract/eval/trait_evaluator.rb +75 -0
  54. data/lib/ruby_llm/contract/eval.rb +16 -0
  55. data/lib/ruby_llm/contract/pipeline/base.rb +62 -0
  56. data/lib/ruby_llm/contract/pipeline/result.rb +131 -0
  57. data/lib/ruby_llm/contract/pipeline/runner.rb +139 -0
  58. data/lib/ruby_llm/contract/pipeline/trace.rb +72 -0
  59. data/lib/ruby_llm/contract/pipeline.rb +6 -0
  60. data/lib/ruby_llm/contract/prompt/ast.rb +38 -0
  61. data/lib/ruby_llm/contract/prompt/builder.rb +47 -0
  62. data/lib/ruby_llm/contract/prompt/node.rb +25 -0
  63. data/lib/ruby_llm/contract/prompt/nodes/example_node.rb +27 -0
  64. data/lib/ruby_llm/contract/prompt/nodes/rule_node.rb +15 -0
  65. data/lib/ruby_llm/contract/prompt/nodes/section_node.rb +26 -0
  66. data/lib/ruby_llm/contract/prompt/nodes/system_node.rb +15 -0
  67. data/lib/ruby_llm/contract/prompt/nodes/user_node.rb +15 -0
  68. data/lib/ruby_llm/contract/prompt/nodes.rb +7 -0
  69. data/lib/ruby_llm/contract/prompt/renderer.rb +76 -0
  70. data/lib/ruby_llm/contract/railtie.rb +20 -0
  71. data/lib/ruby_llm/contract/rake_task.rb +78 -0
  72. data/lib/ruby_llm/contract/rspec/pass_eval.rb +96 -0
  73. data/lib/ruby_llm/contract/rspec/satisfy_contract.rb +31 -0
  74. data/lib/ruby_llm/contract/rspec.rb +6 -0
  75. data/lib/ruby_llm/contract/step/base.rb +138 -0
  76. data/lib/ruby_llm/contract/step/dsl.rb +144 -0
  77. data/lib/ruby_llm/contract/step/limit_checker.rb +64 -0
  78. data/lib/ruby_llm/contract/step/result.rb +38 -0
  79. data/lib/ruby_llm/contract/step/retry_executor.rb +90 -0
  80. data/lib/ruby_llm/contract/step/retry_policy.rb +76 -0
  81. data/lib/ruby_llm/contract/step/runner.rb +126 -0
  82. data/lib/ruby_llm/contract/step/trace.rb +70 -0
  83. data/lib/ruby_llm/contract/step.rb +10 -0
  84. data/lib/ruby_llm/contract/token_estimator.rb +19 -0
  85. data/lib/ruby_llm/contract/types.rb +11 -0
  86. data/lib/ruby_llm/contract/version.rb +7 -0
  87. data/lib/ruby_llm/contract.rb +108 -0
  88. data/ruby_llm-contract.gemspec +33 -0
  89. metadata +172 -0
@@ -0,0 +1,410 @@
1
+ # frozen_string_literal: true
2
+
3
+ # =============================================================================
4
+ # EXAMPLE 4: Real LLM calls via ruby_llm
5
+ #
6
+ # All previous examples used Adapters::Test with canned responses.
7
+ # This example shows how to connect to a real LLM provider
8
+ # (OpenAI, Anthropic, Google, etc.) using Adapters::RubyLLM.
9
+ #
10
+ # REQUIREMENTS:
11
+ # gem install ruby_llm
12
+ # export OPENAI_API_KEY=sk-... # or any provider key
13
+ #
14
+ # RUN:
15
+ # ruby examples/04_real_llm.rb
16
+ # =============================================================================
17
+
18
+ require_relative "../lib/ruby_llm/contract"
19
+
20
+ # =============================================================================
21
+ # STEP 1: Configure — single block, API key auto-creates adapter
22
+ #
23
+ # Just set your API key. The adapter is created automatically.
24
+ # =============================================================================
25
+
26
+ RubyLLM::Contract.configure do |config|
27
+ config.openai_api_key = ENV.fetch("OPENAI_API_KEY", nil)
28
+ # config.anthropic_api_key = ENV.fetch("ANTHROPIC_API_KEY", nil)
29
+ config.default_model = "gpt-4.1-mini"
30
+ end
31
+
32
+ # =============================================================================
33
+ # STEP 3: Define a step — identical to what you'd write with Test adapter
34
+ #
35
+ # The step doesn't know or care which adapter runs it.
36
+ # Same types, same prompt, same contract.
37
+ # =============================================================================
38
+
39
+ class ClassifyIntent < RubyLLM::Contract::Step::Base
40
+ input_type String
41
+
42
+ output_schema do
43
+ string :intent, enum: %w[sales support billing other]
44
+ number :confidence, minimum: 0.0, maximum: 1.0
45
+ end
46
+
47
+ prompt do
48
+ system "You are an intent classifier for a customer support system."
49
+ rule "Return JSON only, no markdown."
50
+ example input: "I want to upgrade my plan",
51
+ output: '{"intent": "sales", "confidence": 0.95}'
52
+ example input: "My invoice is wrong",
53
+ output: '{"intent": "billing", "confidence": 0.9}'
54
+ user "{input}"
55
+ end
56
+ end
57
+
58
+ # =============================================================================
59
+ # STEP 4: Run it — real LLM call, full contract enforcement
60
+ #
61
+ # The adapter sends the prompt to the real model.
62
+ # The contract validates the response just like with Test adapter.
63
+ # You get real token usage in the trace.
64
+ # =============================================================================
65
+
66
+ puts "Calling LLM..."
67
+ result = ClassifyIntent.run("I can't log in to my account")
68
+
69
+ puts "Status: #{result.status}" # => :ok
70
+ puts "Output: #{result.parsed_output}" # => {intent: "support", confidence: 0.95}
71
+ puts "Model: #{result.trace[:model]}" # => "gpt-4.1-mini"
72
+ puts "Latency: #{result.trace[:latency_ms]}ms" # => 823ms (real network time)
73
+ puts "Tokens: #{result.trace[:usage]}" # => {input_tokens: 142, output_tokens: 18}
74
+
75
+ if result.ok?
76
+ puts "Intent: #{result.parsed_output[:intent]}"
77
+ else
78
+ puts "FAILED: #{result.validation_errors}"
79
+ end
80
+
81
+ # =============================================================================
82
+ # STEP 5: Override model per call — A/B test different models
83
+ #
84
+ # Use context to try different models without changing the step definition.
85
+ # =============================================================================
86
+
87
+ puts "\n--- Comparing models ---"
88
+
89
+ %w[gpt-4.1-mini gpt-4.1-nano].each do |model|
90
+ r = ClassifyIntent.run("I need a refund", context: { model: model })
91
+ puts "#{model}: #{r.parsed_output} (#{r.trace[:latency_ms]}ms, #{r.trace[:usage]})"
92
+ end
93
+
94
+ # =============================================================================
95
+ # STEP 6: Control generation params — temperature, max_tokens
96
+ #
97
+ # Options are forwarded to ruby_llm. Lower temperature = more deterministic.
98
+ # =============================================================================
99
+
100
+ puts "\n--- With temperature 0 ---"
101
+ result = ClassifyIntent.run(
102
+ "Do you have an enterprise plan?",
103
+ context: { model: "gpt-4.1-mini", temperature: 0.0, max_tokens: 50 }
104
+ )
105
+ puts "Output: #{result.parsed_output}"
106
+
107
+ # =============================================================================
108
+ # STEP 7: Same step, different provider — just change the model
109
+ #
110
+ # If you have an Anthropic key configured, you can switch with one line.
111
+ # The prompt, contract, and invariants are provider-agnostic.
112
+ # =============================================================================
113
+
114
+ # Uncomment if you have an Anthropic key:
115
+ # puts "\n--- Anthropic ---"
116
+ # result = ClassifyIntent.run(
117
+ # "I want to cancel my subscription",
118
+ # context: { model: "claude-sonnet-4-6" }
119
+ # )
120
+ # puts "Output: #{result.parsed_output}"
121
+
122
+ # =============================================================================
123
+ # STEP 8: Error handling — what happens when things go wrong
124
+ #
125
+ # Contract enforcement works the same with real LLM responses.
126
+ # If the model returns something invalid, you get a clear error.
127
+ # =============================================================================
128
+
129
+ class StrictClassifier < RubyLLM::Contract::Step::Base
130
+ input_type String
131
+
132
+ output_schema do
133
+ string :sentiment, enum: %w[positive negative neutral]
134
+ end
135
+
136
+ prompt do
137
+ system "Classify the sentiment."
138
+ user "{input}"
139
+ end
140
+ end
141
+
142
+ puts "\n--- Strict classifier ---"
143
+ result = StrictClassifier.run("This product is amazing!", context: { model: "gpt-4.1-mini" })
144
+
145
+ if result.ok?
146
+ puts "Passed: #{result.parsed_output}"
147
+ else
148
+ puts "Failed: #{result.status} — #{result.validation_errors}"
149
+ puts "Raw: #{result.raw_output}"
150
+ end
151
+
152
+ # =============================================================================
153
+ # STEP 9: Full power — every prompt feature combined with a real LLM
154
+ #
155
+ # This step uses EVERY feature from 00_basics.rb in a single definition:
156
+ # - system message (main instruction)
157
+ # - rules (individual requirements)
158
+ # - sections (labeled context blocks)
159
+ # - examples (few-shot input/output pairs)
160
+ # - Hash input (multi-field auto-interpolation)
161
+ # - 1-arity invariants (validate output alone)
162
+ # - 2-arity invariants (cross-validate output against input)
163
+ #
164
+ # All of it running against a real LLM.
165
+ # =============================================================================
166
+
167
+ class AnalyzeTicket < RubyLLM::Contract::Step::Base
168
+ input_type RubyLLM::Contract::Types::Hash.schema(
169
+ title: RubyLLM::Contract::Types::String,
170
+ body: RubyLLM::Contract::Types::String,
171
+ product: RubyLLM::Contract::Types::String,
172
+ customer_tier: RubyLLM::Contract::Types::String
173
+ )
174
+ output_type Hash
175
+
176
+ prompt do
177
+ system "You are a support ticket analyzer for a SaaS company."
178
+
179
+ rule "Return JSON only, no markdown, no explanation."
180
+ rule "Include all required fields: category, priority, sentiment, summary, suggested_action."
181
+ rule "Categories: billing, technical, feature_request, account, other."
182
+ rule "Priorities: low, medium, high, urgent."
183
+ rule "Sentiments: positive, negative, neutral, frustrated."
184
+ rule "Summary must be one sentence, max 100 characters."
185
+
186
+ section "PRODUCT CONTEXT", "Product: {product}\nCustomer tier: {customer_tier}"
187
+
188
+ section "PRIORITY RULES",
189
+ "urgent = data loss or security issue\n" \
190
+ "high = service down or billing error\n" \
191
+ "medium = feature broken but workaround exists\n" \
192
+ "low = question, feedback, or cosmetic issue"
193
+
194
+ example input: "Title: Can't export CSV\n\nBody: Export button returns 500 error since yesterday.",
195
+ output: '{"category":"technical","priority":"high","sentiment":"frustrated",' \
196
+ '"summary":"CSV export returns 500 error","suggested_action":"escalate to engineering"}'
197
+
198
+ example input: "Title: Dark mode request\n\nBody: Would love dark mode for late night work!",
199
+ output: '{"category":"feature_request","priority":"low","sentiment":"positive",' \
200
+ '"summary":"Requests dark mode feature","suggested_action":"add to feature backlog"}'
201
+
202
+ user "Title: {title}\n\nBody: {body}"
203
+ end
204
+
205
+ validate("category must be valid") do |o|
206
+ %w[billing technical feature_request account other].include?(o[:category])
207
+ end
208
+
209
+ validate("priority must be valid") do |o|
210
+ %w[low medium high urgent].include?(o[:priority])
211
+ end
212
+
213
+ validate("sentiment must be valid") do |o|
214
+ %w[positive negative neutral frustrated].include?(o[:sentiment])
215
+ end
216
+
217
+ validate("summary must be present") do |o|
218
+ !o[:summary].to_s.strip.empty?
219
+ end
220
+
221
+ validate("summary must be concise") do |o|
222
+ o[:summary].to_s.length <= 100
223
+ end
224
+
225
+ validate("suggested_action must be present") do |o|
226
+ !o[:suggested_action].to_s.strip.empty?
227
+ end
228
+
229
+ # 2-arity: cross-validate output against input
230
+ validate("urgent priority requires justification in body") do |output, input|
231
+ next true unless output[:priority] == "urgent"
232
+
233
+ body = input[:body].downcase
234
+ body.include?("data loss") || body.include?("security") ||
235
+ body.include?("breach") || body.include?("leak") || body.include?("deleted")
236
+ end
237
+ end
238
+
239
+ puts "\n--- Full power: AnalyzeTicket ---"
240
+
241
+ result = AnalyzeTicket.run(
242
+ {
243
+ title: "All my projects disappeared",
244
+ body: "I logged in this morning and all 47 projects are gone. This is a data loss emergency. " \
245
+ "I have a client demo in 2 hours.",
246
+ product: "ProjectHub Pro",
247
+ customer_tier: "enterprise"
248
+ },
249
+ context: { model: "gpt-4.1-mini", temperature: 0.0 }
250
+ )
251
+
252
+ puts "Status: #{result.status}"
253
+ puts "Category: #{result.parsed_output&.dig(:category)}"
254
+ puts "Priority: #{result.parsed_output&.dig(:priority)}"
255
+ puts "Sentiment:#{result.parsed_output&.dig(:sentiment)}"
256
+ puts "Summary: #{result.parsed_output&.dig(:summary)}"
257
+ puts "Action: #{result.parsed_output&.dig(:suggested_action)}"
258
+ puts "Latency: #{result.trace[:latency_ms]}ms"
259
+ puts "Tokens: #{result.trace[:usage]}"
260
+
261
+ puts "ERRORS: #{result.validation_errors}" if result.failed?
262
+
263
+ # =============================================================================
264
+ # STEP 10: Full power — Pipeline + output_schema + invariants + real LLM
265
+ #
266
+ # This combines everything: 3-step pipeline where each step has its own
267
+ # output_schema (provider-enforced), cross-validation invariants,
268
+ # and real LLM calls. If any step hallucinates, execution stops.
269
+ #
270
+ # Use case: meeting transcript → follow-up email
271
+ # Step 1 (listener): extract decisions + action items
272
+ # Step 2 (critic): flag vague owners/deadlines
273
+ # Step 3 (writer): generate send-ready follow-up email
274
+ # =============================================================================
275
+
276
+ class ExtractMeetingItems < RubyLLM::Contract::Step::Base
277
+ input_type String
278
+
279
+ output_schema do
280
+ array :decisions do
281
+ string :id
282
+ string :description
283
+ string :made_by
284
+ end
285
+ array :action_items do
286
+ string :id
287
+ string :task
288
+ string :owner
289
+ string :deadline
290
+ end
291
+ end
292
+
293
+ prompt do
294
+ system "Extract decisions and action items from a meeting transcript."
295
+ rule "Only include decisions explicitly stated, never infer."
296
+ rule "Assign sequential IDs: D1, D2... for decisions, A1, A2... for action items."
297
+ user "{input}"
298
+ end
299
+ end
300
+
301
+ class AnalyzeAmbiguities < RubyLLM::Contract::Step::Base
302
+ input_type Hash
303
+
304
+ output_schema do
305
+ array :decisions do
306
+ string :id
307
+ string :description
308
+ string :made_by
309
+ end
310
+ array :action_items do
311
+ string :id
312
+ string :task
313
+ string :owner
314
+ string :deadline
315
+ end
316
+ array :analyses do
317
+ string :action_item_id
318
+ string :status, enum: %w[clear ambiguous]
319
+ array :issues do
320
+ string :field, enum: %w[owner deadline scope]
321
+ string :problem
322
+ string :clarification_question
323
+ end
324
+ end
325
+ end
326
+
327
+ prompt do
328
+ system "Review action items for completeness. Flag vague owners, missing deadlines, unclear scope."
329
+ rule "Pass through the original decisions and action_items unchanged."
330
+ rule "Add an analyses array with one entry per action item."
331
+ user "Decisions: {decisions}\n\nAction items: {action_items}"
332
+ end
333
+
334
+ # Cross-validate: every action item from step 1 must be analyzed
335
+ validate("all action items analyzed") do |output, input|
336
+ output[:analyses].map { |a| a[:action_item_id] }.sort ==
337
+ input[:action_items].map { |a| a[:id] }.sort
338
+ end
339
+ end
340
+
341
+ class GenerateMeetingEmail < RubyLLM::Contract::Step::Base
342
+ input_type Hash
343
+
344
+ output_schema do
345
+ string :subject
346
+ string :body
347
+ end
348
+
349
+ prompt do
350
+ system "Write a professional follow-up email. List decisions, clear action items " \
351
+ "with owners and deadlines, and embed clarification questions for ambiguous items."
352
+ user "Decisions: {decisions}\nAction items: {action_items}\nAnalyses: {analyses}"
353
+ end
354
+
355
+ validate("subject must be concise") { |o| o[:subject].length <= 80 }
356
+ validate("body must not be empty") { |o| !o[:body].to_s.strip.empty? }
357
+ end
358
+
359
+ class MeetingFollowUpPipeline < RubyLLM::Contract::Pipeline::Base
360
+ step ExtractMeetingItems, as: :extract
361
+ step AnalyzeAmbiguities, as: :analyze
362
+ step GenerateMeetingEmail, as: :email
363
+ end
364
+
365
+ transcript = <<~TRANSCRIPT
366
+ Sarah: Let's go with the new pricing model starting Q3.
367
+ Tom: I'll update the billing system... at some point.
368
+ Sarah: Someone should notify the sales team about the changes.
369
+ Tom: Also, we need to migrate the legacy accounts. Maria, can you handle that?
370
+ Maria: Sure, I'll look into it.
371
+ TRANSCRIPT
372
+
373
+ puts "\n--- Full power: Pipeline + Schema + Invariants + Real LLM ---"
374
+ result = MeetingFollowUpPipeline.run(transcript,
375
+ context: { model: "gpt-4.1-mini", temperature: 0.0 })
376
+
377
+ puts "Pipeline status: #{result.status}"
378
+ puts "Steps run: #{result.step_results.length}"
379
+
380
+ if result.ok?
381
+ puts "\nExtracted: #{result.outputs_by_step[:extract][:decisions]&.length} decisions, " \
382
+ "#{result.outputs_by_step[:extract][:action_items]&.length} action items"
383
+
384
+ ambiguous = result.outputs_by_step[:analyze][:analyses]&.select { |a| a[:status] == "ambiguous" }
385
+ puts "Ambiguous: #{ambiguous&.length} items need clarification"
386
+ puts "Email subj: #{result.outputs_by_step[:email][:subject]}"
387
+ else
388
+ puts "FAILED at: #{result.failed_step}"
389
+ failed = result.step_results.last[:result]
390
+ puts "Errors: #{failed.validation_errors}"
391
+ end
392
+
393
+ # =============================================================================
394
+ # SUMMARY
395
+ #
396
+ # 1. Configure ruby_llm (API keys)
397
+ # 2. Set adapter: Adapters::RubyLLM.new
398
+ # 3. Define steps exactly as before (types, prompt, contract)
399
+ # 4. Run — real LLM call with full contract enforcement
400
+ # 5. Override model/temperature/max_tokens per call via context
401
+ # 6. Switch providers by changing the model name — everything else stays
402
+ # 7. Combine ALL features in a single step: system, rules, sections,
403
+ # examples, hash input, 1-arity + 2-arity invariants
404
+ # 8. Error handling — contract enforcement with real LLM responses
405
+ # 9. Full power single step — AnalyzeTicket with every feature
406
+ # 10. Full power pipeline — 3 steps, schemas, invariants, real LLM
407
+ #
408
+ # The step definition is always provider-agnostic.
409
+ # Swap adapters between Test (specs) and RubyLLM (production).
410
+ # =============================================================================
@@ -0,0 +1,258 @@
1
+ # frozen_string_literal: true
2
+
3
+ # =============================================================================
4
+ # EXAMPLE 5: Declarative output schema (ruby_llm-schema)
5
+ #
6
+ # Replace manual invariants with a schema DSL.
7
+ # The schema is sent to the LLM provider for structured output enforcement.
8
+ #
9
+ # With Test adapter: schema defines expectations, parsing is auto-inferred.
10
+ # With RubyLLM adapter: schema is also enforced server-side by the provider.
11
+ # =============================================================================
12
+
13
+ require_relative "../lib/ruby_llm/contract"
14
+
15
+ # =============================================================================
16
+ # STEP 1: BEFORE — legacy approach with output_type + manual invariants
17
+ #
18
+ # Every enum, range, and required field is a separate invariant.
19
+ # Works, but verbose. This is what you'd write WITHOUT output_schema.
20
+ # =============================================================================
21
+
22
+ RubyLLM::Contract.configure do |c|
23
+ c.default_adapter = RubyLLM::Contract::Adapters::Test.new(
24
+ response: '{"intent": "sales", "confidence": 0.95}'
25
+ )
26
+ end
27
+
28
+ class ClassifyIntentBefore < RubyLLM::Contract::Step::Base
29
+ input_type String
30
+ output_type Hash
31
+
32
+ prompt do
33
+ system "Classify the user's intent."
34
+ rule "Return JSON only."
35
+ user "{input}"
36
+ end
37
+
38
+ validate("must include intent") { |o| o[:intent].to_s != "" }
39
+ validate("intent must be allowed") { |o| %w[sales support billing].include?(o[:intent]) }
40
+ validate("confidence must be a number") { |o| o[:confidence].is_a?(Numeric) }
41
+ validate("confidence in range") { |o| o[:confidence]&.between?(0.0, 1.0) }
42
+ end
43
+
44
+ result = ClassifyIntentBefore.run("I want to buy")
45
+ result.status # => :ok
46
+ result.parsed_output # => {intent: "sales", confidence: 0.95}
47
+
48
+ # =============================================================================
49
+ # STEP 2: AFTER — output_schema replaces structural invariants
50
+ #
51
+ # Same constraints, but declared as a schema.
52
+ # No `output_type`, no `parse :json`, no structural invariants.
53
+ # =============================================================================
54
+
55
+ class ClassifyIntentAfter < RubyLLM::Contract::Step::Base
56
+ input_type String
57
+
58
+ output_schema do
59
+ string :intent, enum: %w[sales support billing]
60
+ number :confidence, minimum: 0.0, maximum: 1.0
61
+ end
62
+
63
+ prompt do
64
+ system "Classify the user's intent."
65
+ rule "Return JSON only."
66
+ user "{input}"
67
+ end
68
+ end
69
+
70
+ result = ClassifyIntentAfter.run("I want to buy")
71
+ result.status # => :ok
72
+ result.parsed_output # => {intent: "sales", confidence: 0.95}
73
+
74
+ # =============================================================================
75
+ # STEP 3: Schema + invariants — best of both worlds
76
+ #
77
+ # Schema handles structure (types, enums, ranges).
78
+ # Invariants handle business logic (cross-validation, conditionals).
79
+ # =============================================================================
80
+
81
+ RubyLLM::Contract.configure do |c|
82
+ c.default_adapter = RubyLLM::Contract::Adapters::Test.new(
83
+ response: '{"category": "account", "priority": "urgent", "summary": "Projects disappeared"}'
84
+ )
85
+ end
86
+
87
+ class AnalyzeTicket < RubyLLM::Contract::Step::Base
88
+ input_type RubyLLM::Contract::Types::Hash.schema(
89
+ title: RubyLLM::Contract::Types::String,
90
+ body: RubyLLM::Contract::Types::String
91
+ )
92
+
93
+ output_schema do
94
+ string :category, enum: %w[billing technical feature_request account other]
95
+ string :priority, enum: %w[low medium high urgent]
96
+ string :summary, description: "One-sentence summary"
97
+ end
98
+
99
+ prompt do
100
+ system "Analyze support tickets."
101
+ rule "Return JSON with category, priority, and summary."
102
+ user "Title: {title}\n\nBody: {body}"
103
+ end
104
+
105
+ # Schema handles: valid category, valid priority, summary present
106
+ # Validate handles: cross-validation with input
107
+ validate("urgent requires justification") do |output, input|
108
+ next true unless output[:priority] == "urgent"
109
+
110
+ body = input[:body].downcase
111
+ body.include?("data loss") || body.include?("security") || body.include?("deleted")
112
+ end
113
+ end
114
+
115
+ # Justified urgent:
116
+ result = AnalyzeTicket.run({
117
+ title: "Projects disappeared",
118
+ body: "All my projects are gone. This is a data loss emergency."
119
+ })
120
+ result.status # => :ok
121
+
122
+ # Unjustified urgent:
123
+ RubyLLM::Contract.configure do |c|
124
+ c.default_adapter = RubyLLM::Contract::Adapters::Test.new(
125
+ response: '{"category": "technical", "priority": "urgent", "summary": "Page is slow"}'
126
+ )
127
+ end
128
+
129
+ result = AnalyzeTicket.run({
130
+ title: "Slow page",
131
+ body: "Dashboard takes 5 seconds to load."
132
+ })
133
+ result.status # => :validation_failed
134
+ result.validation_errors # => ["urgent requires justification"]
135
+
136
+ # =============================================================================
137
+ # STEP 4: Complex schema — nested objects, arrays, optional fields
138
+ # =============================================================================
139
+
140
+ RubyLLM::Contract.configure do |c|
141
+ c.default_adapter = RubyLLM::Contract::Adapters::Test.new(
142
+ response: '{"locale": "en", "groups": [{"who": "Freelancers", "pain_points": ["invoicing", "time tracking"]}]}'
143
+ )
144
+ end
145
+
146
+ class ProfileAudience < RubyLLM::Contract::Step::Base
147
+ input_type RubyLLM::Contract::Types::Hash.schema(
148
+ product: RubyLLM::Contract::Types::String,
149
+ market: RubyLLM::Contract::Types::String
150
+ )
151
+
152
+ output_schema do
153
+ string :locale, description: "ISO 639-1 language code"
154
+ array :groups, min_items: 1, max_items: 4 do
155
+ string :who, description: "Audience segment name"
156
+ array :pain_points, of: :string, min_items: 1
157
+ end
158
+ end
159
+
160
+ prompt do
161
+ system "Generate target audience profiles."
162
+ user "Product: {product}, Market: {market}"
163
+ end
164
+ end
165
+
166
+ result = ProfileAudience.run({ product: "InvoiceApp", market: "US freelancers" })
167
+ result.status # => :ok
168
+ result.parsed_output # => {locale: "en", groups: [{who: "Freelancers", pain_points: [...]}]}
169
+
170
+ # =============================================================================
171
+ # STEP 5: Schema is provider-agnostic
172
+ #
173
+ # With Test adapter: schema auto-infers JSON parsing, no provider enforcement.
174
+ # With RubyLLM adapter: schema is ALSO sent to provider (structured output).
175
+ # The step definition doesn't change.
176
+ # =============================================================================
177
+
178
+ # To use with a real LLM and get provider-side enforcement:
179
+ #
180
+ # RubyLLM.configure { |c| c.openai_api_key = ENV["OPENAI_API_KEY"] }
181
+ # adapter = RubyLLM::Contract::Adapters::RubyLLM.new
182
+ # result = ClassifyIntentAfter.run("I want to buy",
183
+ # context: { adapter: adapter, model: "gpt-4.1-mini" })
184
+ #
185
+ # The provider enforces the schema — the model MUST return valid JSON
186
+ # matching the schema. Parse errors become nearly impossible.
187
+
188
+ # =============================================================================
189
+ # STEP 6: Pipeline with schemas — each step has its own schema
190
+ #
191
+ # Pipeline + output_schema = fully typed, provider-enforced multi-step flow.
192
+ # Each step declares its output schema. Data threads automatically.
193
+ # =============================================================================
194
+
195
+ RubyLLM::Contract.configure do |c|
196
+ c.default_adapter = RubyLLM::Contract::Adapters::Test.new(
197
+ response: '{"category": "billing", "priority": "high", "summary": "Payment page broken"}'
198
+ )
199
+ end
200
+
201
+ class TriageTicket < RubyLLM::Contract::Step::Base
202
+ input_type RubyLLM::Contract::Types::Hash.schema(title: RubyLLM::Contract::Types::String, body: RubyLLM::Contract::Types::String)
203
+
204
+ output_schema do
205
+ string :category, enum: %w[billing technical feature_request account other]
206
+ string :priority, enum: %w[low medium high urgent]
207
+ string :summary
208
+ end
209
+
210
+ prompt do
211
+ system "Triage support ticket."
212
+ user "Title: {title}\nBody: {body}"
213
+ end
214
+ end
215
+
216
+ class SuggestAction < RubyLLM::Contract::Step::Base
217
+ input_type Hash
218
+
219
+ output_schema do
220
+ string :action
221
+ string :team, enum: %w[engineering support billing product]
222
+ boolean :escalate
223
+ end
224
+
225
+ prompt do
226
+ system "Suggest action for a triaged ticket."
227
+ user "Category: {category}, Priority: {priority}, Summary: {summary}"
228
+ end
229
+ end
230
+
231
+ class TicketPipeline < RubyLLM::Contract::Pipeline::Base
232
+ step TriageTicket, as: :triage
233
+ step SuggestAction, as: :action
234
+ end
235
+
236
+ # Both steps share the test adapter, so they get the same canned response.
237
+ # With a real LLM, step 2 would get a different response based on step 1's output.
238
+ result = TicketPipeline.run(
239
+ { title: "Payment page broken", body: "Error 500 on checkout" }
240
+ )
241
+ result.ok? # => true
242
+ result.outputs_by_step[:triage] # => {category: "billing", priority: "high", summary: "..."}
243
+ result.outputs_by_step[:action] # => same canned response (test adapter)
244
+ result.step_results.length # => 2
245
+
246
+ # =============================================================================
247
+ # SUMMARY
248
+ #
249
+ # Step 1: BEFORE — output_type + parse :json + structural invariants
250
+ # Step 2: AFTER — output_schema replaces all of that
251
+ # Step 3: Schema + invariants — schema for structure, invariants for logic
252
+ # Step 4: Complex schemas — nested objects, arrays, constraints
253
+ # Step 5: Provider-agnostic — same schema works with Test and RubyLLM
254
+ # Step 6: Pipeline + schemas — fully typed multi-step composition
255
+ #
256
+ # output_schema is optional. Existing steps with output_type + invariants
257
+ # continue to work unchanged.
258
+ # =============================================================================