ruby_llm-contract 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (89) hide show
  1. checksums.yaml +7 -0
  2. data/.rspec +3 -0
  3. data/.rubocop.yml +55 -0
  4. data/CHANGELOG.md +76 -0
  5. data/Gemfile +11 -0
  6. data/Gemfile.lock +176 -0
  7. data/LICENSE +21 -0
  8. data/README.md +154 -0
  9. data/Rakefile +8 -0
  10. data/examples/00_basics.rb +500 -0
  11. data/examples/01_classify_threads.rb +220 -0
  12. data/examples/02_generate_comment.rb +203 -0
  13. data/examples/03_target_audience.rb +201 -0
  14. data/examples/04_real_llm.rb +410 -0
  15. data/examples/05_output_schema.rb +258 -0
  16. data/examples/07_keyword_extraction.rb +239 -0
  17. data/examples/08_translation.rb +353 -0
  18. data/examples/09_eval_dataset.rb +287 -0
  19. data/examples/10_reddit_full_showcase.rb +363 -0
  20. data/examples/README.md +140 -0
  21. data/lib/ruby_llm/contract/adapters/base.rb +13 -0
  22. data/lib/ruby_llm/contract/adapters/response.rb +17 -0
  23. data/lib/ruby_llm/contract/adapters/ruby_llm.rb +94 -0
  24. data/lib/ruby_llm/contract/adapters/test.rb +44 -0
  25. data/lib/ruby_llm/contract/adapters.rb +6 -0
  26. data/lib/ruby_llm/contract/concerns/deep_symbolize.rb +17 -0
  27. data/lib/ruby_llm/contract/concerns/eval_host.rb +109 -0
  28. data/lib/ruby_llm/contract/concerns/trace_equality.rb +15 -0
  29. data/lib/ruby_llm/contract/concerns/usage_aggregator.rb +43 -0
  30. data/lib/ruby_llm/contract/configuration.rb +21 -0
  31. data/lib/ruby_llm/contract/contract/definition.rb +39 -0
  32. data/lib/ruby_llm/contract/contract/invariant.rb +23 -0
  33. data/lib/ruby_llm/contract/contract/parser.rb +143 -0
  34. data/lib/ruby_llm/contract/contract/schema_validator.rb +239 -0
  35. data/lib/ruby_llm/contract/contract/validator.rb +104 -0
  36. data/lib/ruby_llm/contract/contract.rb +7 -0
  37. data/lib/ruby_llm/contract/cost_calculator.rb +38 -0
  38. data/lib/ruby_llm/contract/dsl.rb +13 -0
  39. data/lib/ruby_llm/contract/errors.rb +19 -0
  40. data/lib/ruby_llm/contract/eval/case_result.rb +76 -0
  41. data/lib/ruby_llm/contract/eval/contract_detail_builder.rb +47 -0
  42. data/lib/ruby_llm/contract/eval/dataset.rb +53 -0
  43. data/lib/ruby_llm/contract/eval/eval_definition.rb +112 -0
  44. data/lib/ruby_llm/contract/eval/evaluation_result.rb +27 -0
  45. data/lib/ruby_llm/contract/eval/evaluator/exact.rb +20 -0
  46. data/lib/ruby_llm/contract/eval/evaluator/json_includes.rb +58 -0
  47. data/lib/ruby_llm/contract/eval/evaluator/proc_evaluator.rb +40 -0
  48. data/lib/ruby_llm/contract/eval/evaluator/regex.rb +27 -0
  49. data/lib/ruby_llm/contract/eval/model_comparison.rb +80 -0
  50. data/lib/ruby_llm/contract/eval/pipeline_result_adapter.rb +15 -0
  51. data/lib/ruby_llm/contract/eval/report.rb +115 -0
  52. data/lib/ruby_llm/contract/eval/runner.rb +162 -0
  53. data/lib/ruby_llm/contract/eval/trait_evaluator.rb +75 -0
  54. data/lib/ruby_llm/contract/eval.rb +16 -0
  55. data/lib/ruby_llm/contract/pipeline/base.rb +62 -0
  56. data/lib/ruby_llm/contract/pipeline/result.rb +131 -0
  57. data/lib/ruby_llm/contract/pipeline/runner.rb +139 -0
  58. data/lib/ruby_llm/contract/pipeline/trace.rb +72 -0
  59. data/lib/ruby_llm/contract/pipeline.rb +6 -0
  60. data/lib/ruby_llm/contract/prompt/ast.rb +38 -0
  61. data/lib/ruby_llm/contract/prompt/builder.rb +47 -0
  62. data/lib/ruby_llm/contract/prompt/node.rb +25 -0
  63. data/lib/ruby_llm/contract/prompt/nodes/example_node.rb +27 -0
  64. data/lib/ruby_llm/contract/prompt/nodes/rule_node.rb +15 -0
  65. data/lib/ruby_llm/contract/prompt/nodes/section_node.rb +26 -0
  66. data/lib/ruby_llm/contract/prompt/nodes/system_node.rb +15 -0
  67. data/lib/ruby_llm/contract/prompt/nodes/user_node.rb +15 -0
  68. data/lib/ruby_llm/contract/prompt/nodes.rb +7 -0
  69. data/lib/ruby_llm/contract/prompt/renderer.rb +76 -0
  70. data/lib/ruby_llm/contract/railtie.rb +20 -0
  71. data/lib/ruby_llm/contract/rake_task.rb +78 -0
  72. data/lib/ruby_llm/contract/rspec/pass_eval.rb +96 -0
  73. data/lib/ruby_llm/contract/rspec/satisfy_contract.rb +31 -0
  74. data/lib/ruby_llm/contract/rspec.rb +6 -0
  75. data/lib/ruby_llm/contract/step/base.rb +138 -0
  76. data/lib/ruby_llm/contract/step/dsl.rb +144 -0
  77. data/lib/ruby_llm/contract/step/limit_checker.rb +64 -0
  78. data/lib/ruby_llm/contract/step/result.rb +38 -0
  79. data/lib/ruby_llm/contract/step/retry_executor.rb +90 -0
  80. data/lib/ruby_llm/contract/step/retry_policy.rb +76 -0
  81. data/lib/ruby_llm/contract/step/runner.rb +126 -0
  82. data/lib/ruby_llm/contract/step/trace.rb +70 -0
  83. data/lib/ruby_llm/contract/step.rb +10 -0
  84. data/lib/ruby_llm/contract/token_estimator.rb +19 -0
  85. data/lib/ruby_llm/contract/types.rb +11 -0
  86. data/lib/ruby_llm/contract/version.rb +7 -0
  87. data/lib/ruby_llm/contract.rb +108 -0
  88. data/ruby_llm-contract.gemspec +33 -0
  89. metadata +172 -0
@@ -0,0 +1,220 @@
1
+ # frozen_string_literal: true
2
+
3
+ # =============================================================================
4
+ # EXAMPLE 1: Thread Classification (PROMO / FILLER / SKIP)
5
+ #
6
+ # Real-world case: A Reddit promotion planner needs to classify threads
7
+ # into PROMO (worth commenting with a product link), FILLER (worth a
8
+ # genuine comment without product mention), or SKIP (irrelevant).
9
+ # =============================================================================
10
+
11
+ require_relative "../lib/ruby_llm/contract"
12
+
13
+ # =============================================================================
14
+ # BEFORE: Legacy approach (inline heredoc + ad-hoc validation)
15
+ # =============================================================================
16
+ #
17
+ # In the legacy codebase, this lives across multiple concern files:
18
+ # - classification_prompts.rb (prompt building)
19
+ # - thread_classification.rb (LLM calling + parsing)
20
+ # - llm_result_mapper.rb (ID matching with positional fallback)
21
+ #
22
+ # ```ruby
23
+ # # classification_prompts.rb
24
+ # def build_classify_prompt(items)
25
+ # <<~PROMPT
26
+ # #{classify_product_header}
27
+ # #{classify_sitemap_section}
28
+ # Classify each Reddit thread below for this product's promotion campaign.
29
+ #
30
+ # For each thread, decide:
31
+ # #{classify_decision_rules}
32
+ #
33
+ # IMPORTANT: Be careful with PROMO. Follow these rules:
34
+ # #{classify_promo_caution_rules}
35
+ #
36
+ # Also provide:
37
+ # #{classify_output_fields}
38
+ #
39
+ # Threads:
40
+ # #{items.to_json}
41
+ # PROMPT
42
+ # end
43
+ #
44
+ # # thread_classification.rb
45
+ # def classify_batch_via_llm(batch)
46
+ # items = build_classify_items(batch)
47
+ # prompt = build_classify_prompt(items)
48
+ # response = ai_call(prompt, schema: classify_response_schema)
49
+ # parsed = parse_llm_json(response)
50
+ # # Manual ID matching with positional fallback (masks errors!)
51
+ # map_llm_results_by_id(items, parsed["threads"])
52
+ # end
53
+ # ```
54
+ #
55
+ # PROBLEMS:
56
+ # - Prompt is a string concatenation of 6 helper methods
57
+ # - No contract on output — if model returns wrong enum, it silently propagates
58
+ # - ID matching has a positional fallback that masks when model rewrites IDs
59
+ # - No way to test prompt quality without hitting the API
60
+ # - Change one line in classify_promo_caution_rules → no idea what broke
61
+
62
+ # =============================================================================
63
+ # AFTER: ruby_llm-contract approach
64
+ # =============================================================================
65
+
66
+ class ClassifyThreads < RubyLLM::Contract::Step::Base
67
+ input_type RubyLLM::Contract::Types::Array.of(RubyLLM::Contract::Types::Hash)
68
+ output_type RubyLLM::Contract::Types::Array.of(RubyLLM::Contract::Types::Hash)
69
+
70
+ prompt do
71
+ system "You classify Reddit threads for a product promotion campaign."
72
+
73
+ rule "For each thread, classify as PROMO, FILLER, or SKIP."
74
+ rule "PROMO: thread author has a problem where the product helps naturally."
75
+ rule "FILLER: related to domain, good for a genuine comment without product mention."
76
+ rule "SKIP: irrelevant, low engagement, hostile to recommendations, grief/politics."
77
+ rule "Return a JSON array with id, classification, relevance_score (0-10), and thread_intent."
78
+ rule "thread_intent must be one of: seeking_help, sharing, discussion, venting."
79
+
80
+ section "SCORING GUIDE", <<~GUIDE
81
+ 8-10: Clear problem/situation the product solves
82
+ 5-7: Author is in target audience, link would fit naturally
83
+ 2-4: Same broad domain but weak connection
84
+ 0-1: Irrelevant
85
+ GUIDE
86
+
87
+ user "{input}"
88
+ end
89
+
90
+ # Structural: every input ID must appear in output
91
+ validate("all thread IDs must match input") do |output, input|
92
+ output.map { |r| r[:id] }.sort == input.map { |t| t[:id] }.sort
93
+ end
94
+
95
+ # Enum: classification must be valid
96
+ validate("classification must be PROMO, FILLER, or SKIP") do |output|
97
+ output.all? { |r| %w[PROMO FILLER SKIP].include?(r[:classification]) }
98
+ end
99
+
100
+ # Consistency: PROMO threads must have decent relevance
101
+ validate("PROMO threads must have relevance_score >= 5") do |output|
102
+ output.select { |r| r[:classification] == "PROMO" }
103
+ .all? { |r| r[:relevance_score].is_a?(Integer) && r[:relevance_score] >= 5 }
104
+ end
105
+
106
+ # Enum: thread_intent must be valid
107
+ validate("thread_intent must be valid") do |output|
108
+ valid = %w[seeking_help sharing discussion venting]
109
+ output.all? { |r| valid.include?(r[:thread_intent]) }
110
+ end
111
+ end
112
+
113
+ # =============================================================================
114
+ # AFTER + SCHEMA: output_schema replaces structural invariants
115
+ #
116
+ # Compare with the version above:
117
+ # - classification enum → schema
118
+ # - thread_intent enum → schema
119
+ # - relevance_score type/range → schema
120
+ # - ID matching → still an invariant (cross-validation with input)
121
+ # - PROMO score check → still an invariant (conditional logic)
122
+ # =============================================================================
123
+
124
+ class ClassifyThreadsWithSchema < RubyLLM::Contract::Step::Base
125
+ input_type RubyLLM::Contract::Types::Array.of(RubyLLM::Contract::Types::Hash)
126
+
127
+ output_schema do
128
+ array :threads do
129
+ string :id
130
+ string :classification, enum: %w[PROMO FILLER SKIP]
131
+ integer :relevance_score, minimum: 0, maximum: 10
132
+ string :thread_intent, enum: %w[seeking_help sharing discussion venting]
133
+ end
134
+ end
135
+
136
+ prompt do
137
+ system "You classify Reddit threads for a product promotion campaign."
138
+
139
+ rule "For each thread, classify as PROMO, FILLER, or SKIP."
140
+ rule "PROMO: thread author has a problem where the product helps naturally."
141
+ rule "FILLER: related to domain, good for a genuine comment without product mention."
142
+ rule "SKIP: irrelevant, low engagement, hostile to recommendations, grief/politics."
143
+ rule "Return JSON with a threads array. Each entry: id, classification, relevance_score (0-10), thread_intent."
144
+ rule "thread_intent must be one of: seeking_help, sharing, discussion, venting."
145
+
146
+ section "SCORING GUIDE", <<~GUIDE
147
+ 8-10: Clear problem/situation the product solves
148
+ 5-7: Author is in target audience, link would fit naturally
149
+ 2-4: Same broad domain but weak connection
150
+ 0-1: Irrelevant
151
+ GUIDE
152
+
153
+ user "{input}"
154
+ end
155
+
156
+ # Only custom business logic — structural constraints are in the schema
157
+ validate("all thread IDs must match input") do |output, input|
158
+ output[:threads].map { |r| r[:id] }.sort == input.map { |t| t[:id] }.sort
159
+ end
160
+
161
+ validate("PROMO threads must have relevance_score >= 5") do |output|
162
+ output[:threads].select { |r| r[:classification] == "PROMO" }
163
+ .all? { |r| r[:relevance_score] >= 5 }
164
+ end
165
+ end
166
+
167
+ # =============================================================================
168
+ # DEMO: Run with test adapter
169
+ # =============================================================================
170
+
171
+ sample_threads = [
172
+ { id: "t1", subreddit: "crochet", title: "spent way too much on yarn this month lol", selftext: "anyone else?" },
173
+ { id: "t2", subreddit: "gaming", title: "my cat destroyed my controller", selftext: "RIP" },
174
+ { id: "t3", subreddit: "deals", title: "best craft supply deals?", selftext: "looking for yarn and fabric sales" }
175
+ ]
176
+
177
+ # Happy path — valid response
178
+ valid_response = [
179
+ { id: "t1", classification: "PROMO", relevance_score: 7, thread_intent: "venting", matched_page: "/yarn-deals" },
180
+ { id: "t2", classification: "SKIP", relevance_score: 1, thread_intent: "venting", matched_page: "" },
181
+ { id: "t3", classification: "PROMO", relevance_score: 9, thread_intent: "seeking_help", matched_page: "/craft-deals" }
182
+ ].to_json
183
+
184
+ adapter = RubyLLM::Contract::Adapters::Test.new(response: valid_response)
185
+ result = ClassifyThreads.run(sample_threads, context: { adapter: adapter, model: "gpt-5-mini" })
186
+
187
+ puts "=== HAPPY PATH ==="
188
+ puts "Status: #{result.status}"
189
+ puts "Parsed output: #{result.parsed_output.map { |r| "#{r[:id]}=#{r[:classification]}" }.join(", ")}"
190
+ puts "Validation errors: #{result.validation_errors}"
191
+ puts
192
+
193
+ # Bad path — model returns wrong enum
194
+ bad_response = [
195
+ { id: "t1", classification: "MAYBE", relevance_score: 7, thread_intent: "venting" },
196
+ { id: "t2", classification: "SKIP", relevance_score: 1, thread_intent: "venting" },
197
+ { id: "t3", classification: "PROMO", relevance_score: 9, thread_intent: "seeking_help" }
198
+ ].to_json
199
+
200
+ bad_adapter = RubyLLM::Contract::Adapters::Test.new(response: bad_response)
201
+ result = ClassifyThreads.run(sample_threads, context: { adapter: bad_adapter })
202
+
203
+ puts "=== BAD ENUM ==="
204
+ puts "Status: #{result.status}"
205
+ puts "Validation errors: #{result.validation_errors}"
206
+ puts
207
+
208
+ # Bad path — model rewrites IDs (the silent bug legacy code masked with positional fallback)
209
+ rewritten_ids_response = [
210
+ { id: "thread_1", classification: "PROMO", relevance_score: 7, thread_intent: "venting" },
211
+ { id: "thread_2", classification: "SKIP", relevance_score: 1, thread_intent: "venting" },
212
+ { id: "thread_3", classification: "PROMO", relevance_score: 9, thread_intent: "seeking_help" }
213
+ ].to_json
214
+
215
+ rewritten_adapter = RubyLLM::Contract::Adapters::Test.new(response: rewritten_ids_response)
216
+ result = ClassifyThreads.run(sample_threads, context: { adapter: rewritten_adapter })
217
+
218
+ puts "=== REWRITTEN IDs (legacy code would silently fallback to positional matching) ==="
219
+ puts "Status: #{result.status}"
220
+ puts "Validation errors: #{result.validation_errors}"
@@ -0,0 +1,203 @@
1
+ # frozen_string_literal: true
2
+
3
+ # =============================================================================
4
+ # EXAMPLE 2: Promo Comment Generation
5
+ #
6
+ # Real-world case: Generate a Reddit comment that subtly promotes a product.
7
+ # The comment must match the thread's language, sound like a real user,
8
+ # include a product link naturally, and follow strict persona rules.
9
+ # =============================================================================
10
+
11
+ require_relative "../lib/ruby_llm/contract"
12
+
13
+ # =============================================================================
14
+ # BEFORE: Legacy approach (200+ lines across multiple concerns)
15
+ # =============================================================================
16
+ #
17
+ # In the legacy codebase, the prompt is assembled from 6+ helper methods
18
+ # across comment_prompts.rb (240 lines):
19
+ #
20
+ # ```ruby
21
+ # # System message built from 8 sections:
22
+ # def system_message_for_promo
23
+ # base_system_message(
24
+ # intro: "You write Reddit comments that subtly promote a product...",
25
+ # voice_lines: [
26
+ # "Sound like a genuine user who found something useful, not an ad.",
27
+ # 'Never say "I built" or "I made this".',
28
+ # "Sound like a real Reddit user: casual, no marketing speak...",
29
+ # # ... 10 more rules
30
+ # ],
31
+ # self_contained_lines: [...],
32
+ # extra_sections: [section("PROMO LINKING BASICS", [...])]
33
+ # )
34
+ # end
35
+ #
36
+ # # User prompt built by string concatenation:
37
+ # def build_promo_prompt(items, strict_language: false, comment_plan: nil)
38
+ # [
39
+ # <<~PRODUCT.strip,
40
+ # [PRODUCT]
41
+ # Domain: #{@url}
42
+ # PRODUCT
43
+ # pages_section,
44
+ # section("URL SELECTION", promo_url_selection_rules),
45
+ # section("PRODUCT MENTION", promo_product_mention_rules + [...]),
46
+ # comment_plan_section,
47
+ # comment_plan_rules,
48
+ # strict_lang_section,
49
+ # <<~ITEMS.strip
50
+ # [ITEMS]
51
+ # #{items.to_json}
52
+ # ITEMS
53
+ # ].compact.join("\n\n")
54
+ # end
55
+ # ```
56
+ #
57
+ # PROBLEMS:
58
+ # - 200+ lines of string building spread across 8+ methods
59
+ # - No validation on output — wrong language silently passes, caught later
60
+ # - Persona + voice + rules mixed with data (URL, pages, items)
61
+ # - Change one voice rule → no way to measure impact on output quality
62
+ # - The `.compact.join("\n\n")` pattern is fragile — easy to break structure
63
+
64
+ # =============================================================================
65
+ # AFTER: ruby_llm-contract approach
66
+ # =============================================================================
67
+
68
+ PERSONA = <<~PERSONA.strip
69
+ You are a woman, 40+, a maker. You solve your own problems by building \
70
+ software. Outside of code you crochet, sew, 3D-print, and do \
71
+ astrophotography with your son. You hunt deals both offline and online. \
72
+ Your writing style: reflective, inventive, casual. You make typos, skip \
73
+ Polish diacritics sometimes, write like a real person on a forum.
74
+ PERSONA
75
+
76
+ class GeneratePromoComment < RubyLLM::Contract::Step::Base
77
+ input_type RubyLLM::Contract::Types::Hash.schema(
78
+ thread_title: RubyLLM::Contract::Types::String,
79
+ thread_selftext: RubyLLM::Contract::Types::String,
80
+ subreddit: RubyLLM::Contract::Types::String,
81
+ target_length: RubyLLM::Contract::Types::Integer,
82
+ thread_language: RubyLLM::Contract::Types::String,
83
+ product_url: RubyLLM::Contract::Types::String,
84
+ matched_page_url: RubyLLM::Contract::Types::String
85
+ )
86
+ output_type Hash
87
+
88
+ prompt do
89
+ system "You write Reddit comments that subtly promote a product. Return valid JSON only."
90
+
91
+ section "PERSONA", PERSONA
92
+
93
+ rule "Sound like a genuine user who found something useful, not an ad."
94
+ rule 'Never say "I built" or "I made this".'
95
+ rule "Casual tone, no marketing speak, no emojis, no bullet points."
96
+ rule "Pick one specific angle and share it concretely."
97
+ rule "Be opinionated; say what worked for you, not generic balanced advice."
98
+ rule 'NEVER start with "Nice X", "Cool X", "Love this". Jump straight into your point.'
99
+ rule "Give 2-3 options; the product link should be ONE of them, not the whole point."
100
+ rule "The comment must stand without the link."
101
+ rule 'Do not introduce the link with "PS:", "btw:", or parenthetical asides.'
102
+ rule "No markdown headers or formatting. Plain text only."
103
+ rule "Write in {thread_language}."
104
+ rule "Approximately {target_length} characters (±20%)."
105
+
106
+ section "PRODUCT", "Domain: {product_url}\nPage: {matched_page_url}"
107
+
108
+ user "r/{subreddit}: {thread_title}\n\n{thread_selftext}\n\nWrite a helpful comment."
109
+ end
110
+
111
+ validate("comment must not be empty") do |o|
112
+ o[:comment].is_a?(String) && o[:comment].strip.length > 10
113
+ end
114
+
115
+ validate("no markdown headers") do |o|
116
+ !o[:comment].to_s.match?(/^\#{2,}\s/)
117
+ end
118
+
119
+ validate("no emojis") do |o|
120
+ !o[:comment].to_s.match?(/[\u{1F600}-\u{1F64F}\u{1F300}-\u{1F5FF}\u{1F680}-\u{1F6FF}]/)
121
+ end
122
+
123
+ validate("includes product link") do |o, input|
124
+ o[:comment].to_s.include?(input[:matched_page_url])
125
+ end
126
+
127
+ validate("length within ±30% of target") do |o, input|
128
+ len = o[:comment].to_s.length
129
+ target = input[:target_length]
130
+ len.between?((target * 0.7).to_i, (target * 1.3).to_i)
131
+ end
132
+
133
+ validate("does not start with banned openings") do |o|
134
+ banned = ["Nice ", "Cool ", "Love this", "Great ", "Totally agree"]
135
+ banned.none? { |b| o[:comment].to_s.start_with?(b) }
136
+ end
137
+ end
138
+
139
+ # =============================================================================
140
+ # DEMO: Run with test adapter
141
+ # =============================================================================
142
+
143
+ input = {
144
+ thread_title: "spent way too much on yarn this month lol",
145
+ thread_selftext: "Between Drops and the new Scheepjes line I'm broke. Anyone else track their spending?",
146
+ subreddit: "crochet",
147
+ target_length: 200,
148
+ thread_language: "en",
149
+ product_url: "https://deals.example.com",
150
+ matched_page_url: "https://deals.example.com/yarn-deals"
151
+ }
152
+
153
+ # Happy path — good comment
154
+ good_comment = {
155
+ comment: "Ugh same. I started tracking last year and the numbers were brutal. " \
156
+ "What helped — monthly yarn budget plus checking https://deals.example.com/yarn-deals " \
157
+ "before impulse buying. Ravelry destash groups too."
158
+ }.to_json
159
+
160
+ adapter = RubyLLM::Contract::Adapters::Test.new(response: good_comment)
161
+ result = GeneratePromoComment.run(input, context: { adapter: adapter })
162
+
163
+ puts "=== HAPPY PATH ==="
164
+ puts "Status: #{result.status}"
165
+ puts "Comment: #{result.parsed_output[:comment]}"
166
+ puts "Validation errors: #{result.validation_errors}"
167
+ puts
168
+
169
+ # Bad path — starts with banned opening
170
+ bad_comment = {
171
+ comment: "Nice question! I track my yarn spending with a spreadsheet and also check " \
172
+ "https://deals.example.com/yarn-deals for sales."
173
+ }.to_json
174
+
175
+ bad_adapter = RubyLLM::Contract::Adapters::Test.new(response: bad_comment)
176
+ result = GeneratePromoComment.run(input, context: { adapter: bad_adapter })
177
+
178
+ puts "=== BANNED OPENING ==="
179
+ puts "Status: #{result.status}"
180
+ puts "Validation errors: #{result.validation_errors}"
181
+ puts
182
+
183
+ # Bad path — missing product link
184
+ no_link_comment = {
185
+ comment: "Same here. I started a spreadsheet and realized I spent way more than I thought. " \
186
+ "Ravelry destash groups are great for cheap yarn though."
187
+ }.to_json
188
+
189
+ no_link_adapter = RubyLLM::Contract::Adapters::Test.new(response: no_link_comment)
190
+ result = GeneratePromoComment.run(input, context: { adapter: no_link_adapter })
191
+
192
+ puts "=== MISSING LINK ==="
193
+ puts "Status: #{result.status}"
194
+ puts "Validation errors: #{result.validation_errors}"
195
+ puts
196
+
197
+ # Inspect the rendered prompt AST
198
+ puts "=== RENDERED PROMPT (first 3 messages) ==="
199
+ adapter = RubyLLM::Contract::Adapters::Test.new(response: good_comment)
200
+ result = GeneratePromoComment.run(input, context: { adapter: adapter })
201
+ result.trace[:messages].first(3).each do |msg|
202
+ puts " [#{msg[:role]}] #{msg[:content][0..80]}..."
203
+ end
@@ -0,0 +1,201 @@
1
+ # frozen_string_literal: true
2
+
3
+ # =============================================================================
4
+ # EXAMPLE 3: Target Audience Generation
5
+ #
6
+ # Real-world case: Analyze a product URL and generate audience profiles.
7
+ # This is stage 1 of the pipeline — if it fails, everything downstream
8
+ # breaks (subreddit discovery, thread classification, comment generation).
9
+ # =============================================================================
10
+
11
+ require_relative "../lib/ruby_llm/contract"
12
+
13
+ # =============================================================================
14
+ # BEFORE: Legacy approach (prompt + schema in concern, ad-hoc validation)
15
+ # =============================================================================
16
+ #
17
+ # In the legacy codebase, this is in target_audience_prompts.rb:
18
+ #
19
+ # ```ruby
20
+ # def build_audience_profile_prompt(plan, pages)
21
+ # <<~PROMPT
22
+ # Analyze this webpage. First, understand what the product/service does.
23
+ # Then figure out who the TARGET AUDIENCE is.
24
+ #
25
+ # #{product_input_context(plan, pages)}
26
+ #
27
+ # ---
28
+ #
29
+ # Generate:
30
+ # 1. LOCALE: Detect the page language. Return ISO 639-1 code.
31
+ # 2. DESCRIPTION: Write exactly 1 sentence (max 15 words): WHAT it is.
32
+ # 3. Identify 2-3 distinct target audience groups.
33
+ #
34
+ # CRITICAL: Describe groups by their LIFE SITUATION and EVERYDAY PROBLEMS...
35
+ # [... 40 more lines of instructions ...]
36
+ # PROMPT
37
+ # end
38
+ #
39
+ # # Validation is ad-hoc, buried in the caller:
40
+ # def valid_product_context?(context)
41
+ # context.is_a?(Hash) &&
42
+ # context["locale"].present? &&
43
+ # context["description"].present? &&
44
+ # context["groups"].is_a?(Array) &&
45
+ # context["groups"].size >= 1
46
+ # end
47
+ # ```
48
+ #
49
+ # PROBLEMS:
50
+ # - 50-line heredoc string — impossible to diff meaningfully
51
+ # - Validation is a separate method, easy to forget to call
52
+ # - If locale is wrong (e.g. "english" instead of "en"), it passes validation
53
+ # - If groups are present but empty/garbage, no way to catch it
54
+ # - Failure here silently poisons all 6 downstream stages
55
+
56
+ # =============================================================================
57
+ # AFTER: ruby_llm-contract approach
58
+ # =============================================================================
59
+
60
+ class GenerateTargetAudience < RubyLLM::Contract::Step::Base
61
+ input_type RubyLLM::Contract::Types::Hash.schema(
62
+ url: RubyLLM::Contract::Types::String,
63
+ body_text: RubyLLM::Contract::Types::String,
64
+ sitemap_pages: RubyLLM::Contract::Types::Array.of(RubyLLM::Contract::Types::Hash)
65
+ )
66
+ output_type Hash
67
+
68
+ prompt do
69
+ system "Analyze a product webpage and generate target audience profiles."
70
+
71
+ rule "Detect page language, return ISO 639-1 code (e.g. 'en', 'pl', 'de')."
72
+ rule "Write product description in exactly 1 sentence, max 15 words. Say WHAT, not HOW."
73
+ rule "Identify 2-3 distinct audience groups based on LIFE SITUATION, not product jargon."
74
+ rule "Write 'who' as if YOU are that person posting on Reddit, not a marketer."
75
+ rule "Return JSON with locale, description, and groups array."
76
+
77
+ section "GOOD vs BAD EXAMPLES", <<~EXAMPLES
78
+ Good "who": "I'm 30, trying to lose weight but I hate counting calories"
79
+ Bad "who": "Adults 25-55 who buy specialty outdoor gear occasionally"
80
+
81
+ Good use_case: "I keep checking 5 different shops and it takes forever"
82
+ Bad use_case: "Track shop promotions across retailers"
83
+
84
+ Good thread: "spent way too much on yarn this month lol"
85
+ Bad thread: "budgeting for craft supplies"
86
+ EXAMPLES
87
+
88
+ user "URL: {url}\n\nBODY TEXT:\n{body_text}\n\nSITEMAP PAGES:\n{sitemap_pages}"
89
+ end
90
+
91
+ validate("locale is valid ISO 639-1") do |o|
92
+ o[:locale].is_a?(String) && o[:locale].match?(/\A[a-z]{2}\z/)
93
+ end
94
+
95
+ validate("description is present and concise") do |o|
96
+ desc = o[:description].to_s.strip
97
+ desc.length > 5 && desc.split.size <= 20
98
+ end
99
+
100
+ validate("has 1-4 audience groups") do |o|
101
+ o[:groups].is_a?(Array) && o[:groups].size.between?(1, 4)
102
+ end
103
+
104
+ validate("each group has who field") do |o|
105
+ o[:groups].is_a?(Array) && o[:groups].all? { |g| g[:who].to_s.strip.length > 10 }
106
+ end
107
+
108
+ validate("each group has use_cases") do |o|
109
+ o[:groups].is_a?(Array) && o[:groups].all? { |g| g[:use_cases].is_a?(Array) && g[:use_cases].size >= 2 }
110
+ end
111
+
112
+ validate("each group has good_fit_threads") do |o|
113
+ o[:groups].is_a?(Array) && o[:groups].all? do |g|
114
+ g[:good_fit_threads].is_a?(Array) && g[:good_fit_threads].size >= 2
115
+ end
116
+ end
117
+ end
118
+
119
+ # =============================================================================
120
+ # DEMO: Run with test adapter — showing cascade failure prevention
121
+ # =============================================================================
122
+
123
+ input = {
124
+ url: "https://deals.example.com",
125
+ body_text: "Track deals from niche online shops. Get alerts for price drops on craft supplies, " \
126
+ "hobby gear, and specialty items. We monitor 200+ small retailers daily.",
127
+ sitemap_pages: [
128
+ { url: "/yarn-deals", title: "Yarn & Crochet Deals", description: "Sales on yarn, hooks, patterns" },
129
+ { url: "/gaming-deals", title: "Gaming Merch Deals", description: "Gaming accessories and merch sales" }
130
+ ]
131
+ }
132
+
133
+ # Happy path — good audience profile
134
+ good_response = {
135
+ locale: "en",
136
+ description: "Deals aggregator for niche online shops.",
137
+ groups: [
138
+ {
139
+ who: "I'm a crafter who spends too much on supplies every month and my partner is getting annoyed.",
140
+ use_cases: ["I keep checking 5 different yarn shops", "I always find out about sales after they end"],
141
+ not_covered: ["Groceries and food delivery", "Air travel"],
142
+ good_fit_threads: ["spent way too much on yarn this month lol", "anyone else feel guilty about hobby spending?"],
143
+ bad_fit_threads: ["best grocery cashback apps", "cheap flight deals"]
144
+ },
145
+ {
146
+ who: "I'm a gamer building my setup on a budget and I hate paying full price for peripherals.",
147
+ use_cases: ["I want to know when my wishlist items go on sale",
148
+ "Small shops have better deals but I can't check them all"],
149
+ not_covered: ["Digital game keys", "Streaming subscriptions"],
150
+ good_fit_threads: ["just got into minipainting and my wallet hurts", "budget gaming setup thread"],
151
+ bad_fit_threads: ["best game pass deals", "Netflix vs Disney+"]
152
+ }
153
+ ]
154
+ }.to_json
155
+
156
+ adapter = RubyLLM::Contract::Adapters::Test.new(response: good_response)
157
+ result = GenerateTargetAudience.run(input, context: { adapter: adapter })
158
+
159
+ puts "=== HAPPY PATH ==="
160
+ puts "Status: #{result.status}"
161
+ puts "Locale: #{result.parsed_output[:locale]}"
162
+ puts "Description: #{result.parsed_output[:description]}"
163
+ puts "Groups: #{result.parsed_output[:groups].size}"
164
+ puts "Validation errors: #{result.validation_errors}"
165
+ puts
166
+
167
+ # Bad path — invalid locale (legacy code would let "english" pass)
168
+ bad_locale_response = {
169
+ locale: "english", # Should be "en", not "english"
170
+ description: "Deals aggregator for niche online shops.",
171
+ groups: [{ who: "A crafter", use_cases: ["buying yarn"], not_covered: [], good_fit_threads: ["yarn deals"],
172
+ bad_fit_threads: [] }]
173
+ }.to_json
174
+
175
+ bad_adapter = RubyLLM::Contract::Adapters::Test.new(response: bad_locale_response)
176
+ result = GenerateTargetAudience.run(input, context: { adapter: bad_adapter })
177
+
178
+ puts "=== BAD LOCALE (legacy code would let this pass) ==="
179
+ puts "Status: #{result.status}"
180
+ puts "Validation errors: #{result.validation_errors}"
181
+ puts
182
+
183
+ # Bad path — empty/garbage groups (cascade failure source)
184
+ empty_groups_response = {
185
+ locale: "en",
186
+ description: "A website.",
187
+ groups: []
188
+ }.to_json
189
+
190
+ empty_adapter = RubyLLM::Contract::Adapters::Test.new(response: empty_groups_response)
191
+ result = GenerateTargetAudience.run(input, context: { adapter: empty_adapter })
192
+
193
+ puts "=== EMPTY GROUPS (would poison all downstream stages) ==="
194
+ puts "Status: #{result.status}"
195
+ puts "Validation errors: #{result.validation_errors}"
196
+ puts
197
+
198
+ # The key insight: in a pipeline, you check result.ok? before proceeding
199
+ puts "=== CASCADE PREVENTION ==="
200
+ puts "if result.failed? → don't run SearchExpansion, ThreadClassification, CommentGeneration"
201
+ puts "Legacy code would silently pass bad data to 6 more LLM calls, wasting tokens and producing garbage."