ruby_llm-contract 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (89) hide show
  1. checksums.yaml +7 -0
  2. data/.rspec +3 -0
  3. data/.rubocop.yml +55 -0
  4. data/CHANGELOG.md +76 -0
  5. data/Gemfile +11 -0
  6. data/Gemfile.lock +176 -0
  7. data/LICENSE +21 -0
  8. data/README.md +154 -0
  9. data/Rakefile +8 -0
  10. data/examples/00_basics.rb +500 -0
  11. data/examples/01_classify_threads.rb +220 -0
  12. data/examples/02_generate_comment.rb +203 -0
  13. data/examples/03_target_audience.rb +201 -0
  14. data/examples/04_real_llm.rb +410 -0
  15. data/examples/05_output_schema.rb +258 -0
  16. data/examples/07_keyword_extraction.rb +239 -0
  17. data/examples/08_translation.rb +353 -0
  18. data/examples/09_eval_dataset.rb +287 -0
  19. data/examples/10_reddit_full_showcase.rb +363 -0
  20. data/examples/README.md +140 -0
  21. data/lib/ruby_llm/contract/adapters/base.rb +13 -0
  22. data/lib/ruby_llm/contract/adapters/response.rb +17 -0
  23. data/lib/ruby_llm/contract/adapters/ruby_llm.rb +94 -0
  24. data/lib/ruby_llm/contract/adapters/test.rb +44 -0
  25. data/lib/ruby_llm/contract/adapters.rb +6 -0
  26. data/lib/ruby_llm/contract/concerns/deep_symbolize.rb +17 -0
  27. data/lib/ruby_llm/contract/concerns/eval_host.rb +109 -0
  28. data/lib/ruby_llm/contract/concerns/trace_equality.rb +15 -0
  29. data/lib/ruby_llm/contract/concerns/usage_aggregator.rb +43 -0
  30. data/lib/ruby_llm/contract/configuration.rb +21 -0
  31. data/lib/ruby_llm/contract/contract/definition.rb +39 -0
  32. data/lib/ruby_llm/contract/contract/invariant.rb +23 -0
  33. data/lib/ruby_llm/contract/contract/parser.rb +143 -0
  34. data/lib/ruby_llm/contract/contract/schema_validator.rb +239 -0
  35. data/lib/ruby_llm/contract/contract/validator.rb +104 -0
  36. data/lib/ruby_llm/contract/contract.rb +7 -0
  37. data/lib/ruby_llm/contract/cost_calculator.rb +38 -0
  38. data/lib/ruby_llm/contract/dsl.rb +13 -0
  39. data/lib/ruby_llm/contract/errors.rb +19 -0
  40. data/lib/ruby_llm/contract/eval/case_result.rb +76 -0
  41. data/lib/ruby_llm/contract/eval/contract_detail_builder.rb +47 -0
  42. data/lib/ruby_llm/contract/eval/dataset.rb +53 -0
  43. data/lib/ruby_llm/contract/eval/eval_definition.rb +112 -0
  44. data/lib/ruby_llm/contract/eval/evaluation_result.rb +27 -0
  45. data/lib/ruby_llm/contract/eval/evaluator/exact.rb +20 -0
  46. data/lib/ruby_llm/contract/eval/evaluator/json_includes.rb +58 -0
  47. data/lib/ruby_llm/contract/eval/evaluator/proc_evaluator.rb +40 -0
  48. data/lib/ruby_llm/contract/eval/evaluator/regex.rb +27 -0
  49. data/lib/ruby_llm/contract/eval/model_comparison.rb +80 -0
  50. data/lib/ruby_llm/contract/eval/pipeline_result_adapter.rb +15 -0
  51. data/lib/ruby_llm/contract/eval/report.rb +115 -0
  52. data/lib/ruby_llm/contract/eval/runner.rb +162 -0
  53. data/lib/ruby_llm/contract/eval/trait_evaluator.rb +75 -0
  54. data/lib/ruby_llm/contract/eval.rb +16 -0
  55. data/lib/ruby_llm/contract/pipeline/base.rb +62 -0
  56. data/lib/ruby_llm/contract/pipeline/result.rb +131 -0
  57. data/lib/ruby_llm/contract/pipeline/runner.rb +139 -0
  58. data/lib/ruby_llm/contract/pipeline/trace.rb +72 -0
  59. data/lib/ruby_llm/contract/pipeline.rb +6 -0
  60. data/lib/ruby_llm/contract/prompt/ast.rb +38 -0
  61. data/lib/ruby_llm/contract/prompt/builder.rb +47 -0
  62. data/lib/ruby_llm/contract/prompt/node.rb +25 -0
  63. data/lib/ruby_llm/contract/prompt/nodes/example_node.rb +27 -0
  64. data/lib/ruby_llm/contract/prompt/nodes/rule_node.rb +15 -0
  65. data/lib/ruby_llm/contract/prompt/nodes/section_node.rb +26 -0
  66. data/lib/ruby_llm/contract/prompt/nodes/system_node.rb +15 -0
  67. data/lib/ruby_llm/contract/prompt/nodes/user_node.rb +15 -0
  68. data/lib/ruby_llm/contract/prompt/nodes.rb +7 -0
  69. data/lib/ruby_llm/contract/prompt/renderer.rb +76 -0
  70. data/lib/ruby_llm/contract/railtie.rb +20 -0
  71. data/lib/ruby_llm/contract/rake_task.rb +78 -0
  72. data/lib/ruby_llm/contract/rspec/pass_eval.rb +96 -0
  73. data/lib/ruby_llm/contract/rspec/satisfy_contract.rb +31 -0
  74. data/lib/ruby_llm/contract/rspec.rb +6 -0
  75. data/lib/ruby_llm/contract/step/base.rb +138 -0
  76. data/lib/ruby_llm/contract/step/dsl.rb +144 -0
  77. data/lib/ruby_llm/contract/step/limit_checker.rb +64 -0
  78. data/lib/ruby_llm/contract/step/result.rb +38 -0
  79. data/lib/ruby_llm/contract/step/retry_executor.rb +90 -0
  80. data/lib/ruby_llm/contract/step/retry_policy.rb +76 -0
  81. data/lib/ruby_llm/contract/step/runner.rb +126 -0
  82. data/lib/ruby_llm/contract/step/trace.rb +70 -0
  83. data/lib/ruby_llm/contract/step.rb +10 -0
  84. data/lib/ruby_llm/contract/token_estimator.rb +19 -0
  85. data/lib/ruby_llm/contract/types.rb +11 -0
  86. data/lib/ruby_llm/contract/version.rb +7 -0
  87. data/lib/ruby_llm/contract.rb +108 -0
  88. data/ruby_llm-contract.gemspec +33 -0
  89. metadata +172 -0
@@ -0,0 +1,239 @@
1
+ # frozen_string_literal: true
2
+
3
+ # =============================================================================
4
+ # EXAMPLE 7: Keyword Extraction with probability scoring
5
+ #
6
+ # One article in, up to 15 keywords out — each with a relevance
7
+ # probability. Schema enforces structure (array bounds, number range).
8
+ # Invariants enforce logic (sorted, no duplicates, keywords from text).
9
+ #
10
+ # Shows:
11
+ # - Array output_schema with nested objects
12
+ # - min_items / max_items constraints
13
+ # - number range (probability 0.0–1.0)
14
+ # - Invariant: sorted order (schema can't express this)
15
+ # - Invariant: uniqueness (schema can't express this)
16
+ # - Invariant: cross-validation — keywords must appear in source text
17
+ # - retry_policy for model escalation
18
+ # =============================================================================
19
+
20
+ require_relative "../lib/ruby_llm/contract"
21
+
22
+ # =============================================================================
23
+ # STEP DEFINITION
24
+ # =============================================================================
25
+
26
+ class ExtractKeywords < RubyLLM::Contract::Step::Base
27
+ input_type String
28
+
29
+ output_schema do
30
+ array :keywords, min_items: 1, max_items: 15 do
31
+ string :keyword, description: "1-3 word keyword or phrase"
32
+ number :probability, minimum: 0.0, maximum: 1.0
33
+ end
34
+ end
35
+
36
+ prompt do
37
+ system "Extract the most relevant keywords from the article."
38
+ rule "Return up to 15 keywords, each with a relevance probability (0.0 to 1.0)."
39
+ rule "Sort by probability descending (most relevant first)."
40
+ rule "Each keyword must be 1-3 words."
41
+ rule "Keywords must actually appear in or directly relate to the text."
42
+
43
+ example input: "Ruby on Rails is a web framework written in Ruby.",
44
+ output: '{"keywords":[{"keyword":"Ruby on Rails","probability":0.95},{"keyword":"web framework","probability":0.85},{"keyword":"Ruby","probability":0.75}]}'
45
+
46
+ user "{input}"
47
+ end
48
+
49
+ validate("sorted by probability descending") do |o|
50
+ probs = o[:keywords].map { |k| k[:probability] }
51
+ probs == probs.sort.reverse
52
+ end
53
+
54
+ validate("no duplicate keywords") do |o|
55
+ words = o[:keywords].map { |k| k[:keyword].downcase.strip }
56
+ words.uniq.length == words.length
57
+ end
58
+
59
+ validate("keywords relate to source text") do |output, input|
60
+ text = input.downcase
61
+ matches = output[:keywords].count { |k| text.include?(k[:keyword].downcase) }
62
+ matches >= (output[:keywords].length * 0.7).ceil
63
+ end
64
+
65
+ retry_policy models: %w[gpt-4.1-nano gpt-4.1-mini]
66
+ end
67
+
68
+ # =============================================================================
69
+ # TEST WITH CANNED RESPONSES
70
+ # =============================================================================
71
+
72
+ article = <<~ARTICLE
73
+ Artificial intelligence is transforming the way developers build software.
74
+ Machine learning models, particularly large language models like GPT and Claude,
75
+ are being integrated into development workflows for code generation, testing,
76
+ and documentation. Ruby developers are adopting gems like ruby_llm to interact
77
+ with these models through a clean API. The challenge remains in ensuring output
78
+ quality — without contracts and validation, LLM responses can hallucinate or
79
+ produce structurally invalid data that breaks downstream systems.
80
+ ARTICLE
81
+
82
+ puts "=" * 60
83
+ puts "KEYWORD EXTRACTION"
84
+ puts "=" * 60
85
+
86
+ # Happy path — good keywords
87
+ good_response = {
88
+ keywords: [
89
+ { keyword: "artificial intelligence", probability: 0.95 },
90
+ { keyword: "machine learning", probability: 0.90 },
91
+ { keyword: "large language models", probability: 0.88 },
92
+ { keyword: "Ruby developers", probability: 0.82 },
93
+ { keyword: "code generation", probability: 0.78 },
94
+ { keyword: "output quality", probability: 0.72 },
95
+ { keyword: "ruby_llm", probability: 0.70 },
96
+ { keyword: "LLM responses", probability: 0.65 },
97
+ { keyword: "validation", probability: 0.60 }
98
+ ]
99
+ }.to_json
100
+
101
+ adapter = RubyLLM::Contract::Adapters::Test.new(response: good_response)
102
+ result = ExtractKeywords.run(article, context: { adapter: adapter })
103
+
104
+ puts "\n--- Happy path ---"
105
+ puts "Status: #{result.status}"
106
+ result.parsed_output[:keywords].each do |k|
107
+ bar = "#" * (k[:probability] * 20).round
108
+ puts " #{k[:probability].to_s.ljust(5)} #{bar.ljust(20)} #{k[:keyword]}"
109
+ end
110
+
111
+ # Bad: unsorted probabilities
112
+ puts "\n--- Invariant catches: unsorted ---"
113
+ unsorted = {
114
+ keywords: [
115
+ { keyword: "Ruby", probability: 0.60 },
116
+ { keyword: "AI", probability: 0.95 },
117
+ { keyword: "testing", probability: 0.80 }
118
+ ]
119
+ }.to_json
120
+
121
+ r2 = ExtractKeywords.run(article, context: { adapter: RubyLLM::Contract::Adapters::Test.new(response: unsorted) })
122
+ puts "Status: #{r2.status}"
123
+ puts "Errors: #{r2.validation_errors}"
124
+
125
+ # Bad: duplicate keywords
126
+ puts "\n--- Invariant catches: duplicates ---"
127
+ dupes = {
128
+ keywords: [
129
+ { keyword: "machine learning", probability: 0.95 },
130
+ { keyword: "Machine Learning", probability: 0.90 },
131
+ { keyword: "AI", probability: 0.80 }
132
+ ]
133
+ }.to_json
134
+
135
+ r3 = ExtractKeywords.run(article, context: { adapter: RubyLLM::Contract::Adapters::Test.new(response: dupes) })
136
+ puts "Status: #{r3.status}"
137
+ puts "Errors: #{r3.validation_errors}"
138
+
139
+ # Bad: hallucinated keywords not in text
140
+ puts "\n--- Invariant catches: hallucinated keywords ---"
141
+ hallucinated = {
142
+ keywords: [
143
+ { keyword: "blockchain", probability: 0.95 },
144
+ { keyword: "cryptocurrency", probability: 0.90 },
145
+ { keyword: "NFT marketplace", probability: 0.85 },
146
+ { keyword: "artificial intelligence", probability: 0.80 }
147
+ ]
148
+ }.to_json
149
+
150
+ r4 = ExtractKeywords.run(article, context: { adapter: RubyLLM::Contract::Adapters::Test.new(response: hallucinated) })
151
+ puts "Status: #{r4.status}"
152
+ puts "Errors: #{r4.validation_errors}"
153
+
154
+ # =============================================================================
155
+ # PIPELINE: Article → Keywords → Related Topics
156
+ # =============================================================================
157
+
158
+ puts "\n\n#{"=" * 60}"
159
+ puts "PIPELINE: Article → Keywords → Related Topics"
160
+ puts "=" * 60
161
+
162
+ class SuggestRelatedTopics < RubyLLM::Contract::Step::Base
163
+ input_type Hash
164
+
165
+ output_schema do
166
+ array :topics, min_items: 3, max_items: 5 do
167
+ string :title
168
+ string :angle, description: "Unique angle or hook for the topic"
169
+ end
170
+ end
171
+
172
+ prompt do
173
+ system "Suggest related article topics based on the extracted keywords."
174
+ rule "Each topic must have a unique angle, not just repeat the keywords."
175
+ rule "Topics should be interesting to the same audience."
176
+ user "Keywords: {keywords}"
177
+ end
178
+
179
+ validate("topics have unique titles") do |o|
180
+ titles = o[:topics].map { |t| t[:title].downcase }
181
+ titles.uniq.length == titles.length
182
+ end
183
+
184
+ validate("angles are substantive") do |o|
185
+ o[:topics].all? { |t| t[:angle].to_s.split.length >= 5 }
186
+ end
187
+ end
188
+
189
+ class ArticlePipeline < RubyLLM::Contract::Pipeline::Base
190
+ step ExtractKeywords, as: :keywords
191
+ step SuggestRelatedTopics, as: :topics
192
+ end
193
+
194
+ topics_response = {
195
+ topics: [
196
+ { title: "Building LLM-Powered Ruby Gems",
197
+ angle: "How to structure a Ruby gem that wraps LLM APIs with type safety" },
198
+ { title: "Contract-First AI Development",
199
+ angle: "Why treating LLM outputs like API responses improves reliability" },
200
+ { title: "Testing AI Features Without API Calls",
201
+ angle: "Deterministic testing patterns for LLM integrations using canned adapters" }
202
+ ]
203
+ }.to_json
204
+
205
+ adapter_kw = RubyLLM::Contract::Adapters::Test.new(response: good_response)
206
+ adapter_tp = RubyLLM::Contract::Adapters::Test.new(response: topics_response)
207
+
208
+ # Run steps individually (different adapters per step)
209
+ r_kw = ExtractKeywords.run(article, context: { adapter: adapter_kw })
210
+ r_tp = SuggestRelatedTopics.run(r_kw.parsed_output, context: { adapter: adapter_tp })
211
+
212
+ puts "\nKeywords → Topics pipeline:"
213
+ puts " Keywords: #{r_kw.parsed_output[:keywords].length} extracted"
214
+ puts " Topics:"
215
+ r_tp.parsed_output[:topics].each do |t|
216
+ puts " #{t[:title]}"
217
+ puts " → #{t[:angle]}"
218
+ end
219
+
220
+ # =============================================================================
221
+ # SUMMARY
222
+ #
223
+ # Schema handles:
224
+ # - Array with 1-15 items (min_items, max_items)
225
+ # - Each item has keyword (string) + probability (number 0.0-1.0)
226
+ #
227
+ # Invariants handle:
228
+ # - Sorted by probability (schema can't express ordering)
229
+ # - No duplicates (schema can't express uniqueness)
230
+ # - Keywords from source text (schema can't see input)
231
+ #
232
+ # Pipeline:
233
+ # - Extract keywords → suggest related topics
234
+ # - Each step has its own schema + invariants
235
+ #
236
+ # Model escalation:
237
+ # - retry_policy { escalate "nano", "mini" }
238
+ # - If nano returns unsorted or hallucinated keywords, mini retries
239
+ # =============================================================================
@@ -0,0 +1,353 @@
1
+ # frozen_string_literal: true
2
+
3
+ # =============================================================================
4
+ # EXAMPLE 8: Translation pipeline with quality checks
5
+ #
6
+ # Real-world case: translate product page segments preserving tone,
7
+ # length constraints, and key terms. Pipeline:
8
+ #
9
+ # 1. Extract — find translatable segments with context and max length
10
+ # 2. Translate — translate each segment respecting constraints
11
+ # 3. Review — quality-check translations (detect untranslated terms,
12
+ # length violations, tone mismatches)
13
+ #
14
+ # Shows:
15
+ # - Pipeline where each step has a fundamentally different LLM skill
16
+ # (analysis → creative writing → evaluation)
17
+ # - Cross-validation: all segment keys from step 1 must appear in step 2
18
+ # - 2-arity invariant: max_length from extraction enforced on translations
19
+ # - Content quality: detect untranslated source terms left in output
20
+ # - Why 3 steps can't be 1: same model evaluating its own translation
21
+ # has self-evaluation bias — step 3 should ideally use a different model
22
+ # =============================================================================
23
+
24
+ require_relative "../lib/ruby_llm/contract"
25
+
26
+ # =============================================================================
27
+ # STEP 1: Extract translatable segments
28
+ #
29
+ # Input: raw product page text
30
+ # Output: structured segments with context, importance, and max length
31
+ # =============================================================================
32
+
33
+ class ExtractSegments < RubyLLM::Contract::Step::Base
34
+ input_type RubyLLM::Contract::Types::Hash.schema(
35
+ page_text: RubyLLM::Contract::Types::String,
36
+ target_lang: RubyLLM::Contract::Types::String
37
+ )
38
+
39
+ output_schema do
40
+ string :source_lang
41
+ string :target_lang
42
+ array :segments, min_items: 1 do
43
+ string :key, description: "Unique identifier like hero_headline, cta_button"
44
+ string :text, description: "Original text to translate"
45
+ string :context, enum: %w[headline subheadline description cta legal testimonial]
46
+ integer :max_length, description: "Max character count for the translation"
47
+ string :tone, enum: %w[punchy professional casual formal technical]
48
+ end
49
+ end
50
+
51
+ prompt do
52
+ system "Extract translatable text segments from a product page."
53
+ rule "Assign each segment a unique key based on its role (e.g., hero_headline, cta_primary)."
54
+ rule "Determine context type and appropriate tone for translation."
55
+ rule "Set max_length based on UI constraints — headlines short, descriptions longer."
56
+
57
+ example input: "Ship faster. The deployment platform for modern teams. Try free →",
58
+ output: '{"source_lang":"en","target_lang":"fr","segments":[' \
59
+ '{"key":"hero_headline","text":"Ship faster","context":"headline","max_length":20,"tone":"punchy"},' \
60
+ '{"key":"hero_sub","text":"The deployment platform for modern teams","context":"subheadline","max_length":60,"tone":"professional"},' \
61
+ '{"key":"cta_primary","text":"Try free →","context":"cta","max_length":15,"tone":"punchy"}]}'
62
+
63
+ user "Target language: {target_lang}\n\nPage text:\n{page_text}"
64
+ end
65
+
66
+ validate("target_lang preserved") do |output, input|
67
+ output[:target_lang] == input[:target_lang]
68
+ end
69
+
70
+ validate("unique segment keys") do |o|
71
+ keys = o[:segments].map { |s| s[:key] }
72
+ keys.uniq.length == keys.length
73
+ end
74
+ end
75
+
76
+ # =============================================================================
77
+ # STEP 2: Translate segments
78
+ #
79
+ # Input: extracted segments with context and constraints
80
+ # Output: translated segments preserving keys and respecting max_length
81
+ # =============================================================================
82
+
83
+ class TranslateSegments < RubyLLM::Contract::Step::Base
84
+ input_type Hash
85
+
86
+ output_schema do
87
+ string :source_lang
88
+ string :target_lang
89
+ array :translations, min_items: 1 do
90
+ string :key
91
+ string :original
92
+ string :translated
93
+ string :context, enum: %w[headline subheadline description cta legal testimonial]
94
+ integer :max_length, description: "Carried through from extraction for downstream validation"
95
+ integer :original_length
96
+ integer :translated_length
97
+ end
98
+ end
99
+
100
+ prompt do
101
+ system "Translate product page segments to the target language."
102
+ rule "Preserve tone: headlines punchy, CTAs action-oriented, descriptions natural."
103
+ rule "Respect max_length — abbreviate naturally if needed, never truncate mid-word."
104
+ rule "Keep brand names, product names, and URLs untranslated."
105
+ rule "Carry through max_length from the input segments."
106
+ rule "Include original and translated length for quality tracking."
107
+ user "Source: {source_lang} → Target: {target_lang}\n\nSegments:\n{segments}"
108
+ end
109
+
110
+ validate("all segments translated") do |output, input|
111
+ output[:translations].map { |t| t[:key] }.sort ==
112
+ (input[:segments] || []).map { |s| s[:key] }.sort
113
+ end
114
+
115
+ validate("translations within max_length") do |output, input|
116
+ segments_by_key = (input[:segments] || []).to_h { |s| [s[:key], s] }
117
+ output[:translations].all? do |t|
118
+ max = segments_by_key.dig(t[:key], :max_length)
119
+ max.nil? || t[:translated].to_s.length <= max
120
+ end
121
+ end
122
+
123
+ validate("translations differ from originals") do |o|
124
+ o[:translations].all? { |t| t[:translated] != t[:original] }
125
+ end
126
+
127
+ retry_policy models: %w[gpt-4.1-nano gpt-4.1-mini]
128
+ end
129
+
130
+ # =============================================================================
131
+ # STEP 3: Review translation quality
132
+ #
133
+ # Input: original segments + translations
134
+ # Output: quality report with per-segment scores and issues
135
+ #
136
+ # This step uses a DIFFERENT LLM skill (evaluation, not generation).
137
+ # A model reviewing its own translations has bias — in production,
138
+ # you'd use a different model or temperature for this step.
139
+ # =============================================================================
140
+
141
+ class ReviewTranslations < RubyLLM::Contract::Step::Base
142
+ input_type Hash
143
+
144
+ output_schema do
145
+ string :target_lang
146
+ integer :total_segments
147
+ integer :passed_segments
148
+ array :reviews, min_items: 1 do
149
+ string :key
150
+ string :verdict, enum: %w[pass warning fail]
151
+ string :issue, description: "Empty if pass, description if warning/fail"
152
+ end
153
+ end
154
+
155
+ prompt do
156
+ system "Review translations for quality. You are a professional translator and editor."
157
+ rule "Check each translation for: accuracy, natural phrasing, tone match, length vs max_length."
158
+ rule "Pass: translation is accurate, natural, and within max_length."
159
+ rule "Warning: minor issue (slightly awkward phrasing, could be improved)."
160
+ rule "Fail: wrong meaning, untranslated text left in, or translated_length exceeds max_length."
161
+ user "Target language: {target_lang}\n\nTranslations:\n{translations}"
162
+ end
163
+
164
+ validate("all translations reviewed") do |output, input|
165
+ output[:reviews].map { |r| r[:key] }.sort ==
166
+ (input[:translations] || []).map { |t| t[:key] }.sort
167
+ end
168
+
169
+ validate("counts are consistent") do |o|
170
+ o[:passed_segments] == o[:reviews].count { |r| r[:verdict] == "pass" }
171
+ end
172
+
173
+ validate("failed reviews have issues") do |o|
174
+ o[:reviews].reject { |r| r[:verdict] == "pass" }.all? do |r|
175
+ !r[:issue].to_s.strip.empty?
176
+ end
177
+ end
178
+
179
+ validate("fail verdict for over-limit translations") do |output, input|
180
+ translations_by_key = (input[:translations] || []).to_h { |t| [t[:key], t] }
181
+ output[:reviews].all? do |r|
182
+ t = translations_by_key[r[:key]]
183
+ next true unless t && t[:max_length] && t[:translated_length]
184
+ next true if t[:translated_length] <= t[:max_length]
185
+
186
+ %w[warning fail].include?(r[:verdict])
187
+ end
188
+ end
189
+ end
190
+
191
+ # =============================================================================
192
+ # PIPELINE
193
+ # =============================================================================
194
+
195
+ class TranslationPipeline < RubyLLM::Contract::Pipeline::Base
196
+ step ExtractSegments, as: :extract
197
+ step TranslateSegments, as: :translate
198
+ step ReviewTranslations, as: :review
199
+ end
200
+
201
+ # =============================================================================
202
+ # TEST WITH CANNED RESPONSES
203
+ # =============================================================================
204
+
205
+ page_text = <<~PAGE
206
+ Ship faster with Acme Deploy
207
+
208
+ The deployment platform built for modern engineering teams.
209
+ Push to production in seconds, not hours. Zero-downtime deploys,
210
+ instant rollbacks, and real-time logs.
211
+
212
+ Start free — no credit card required.
213
+
214
+ "Acme Deploy cut our deployment time from 45 minutes to 30 seconds."
215
+ — Sarah Chen, CTO at Widgets Inc.
216
+ PAGE
217
+
218
+ input = { page_text: page_text, target_lang: "fr" }
219
+
220
+ extract_response = {
221
+ source_lang: "en", target_lang: "fr",
222
+ segments: [
223
+ { key: "hero_headline", text: "Ship faster with Acme Deploy", context: "headline", max_length: 40, tone: "punchy" },
224
+ { key: "hero_sub", text: "The deployment platform built for modern engineering teams", context: "subheadline",
225
+ max_length: 80, tone: "professional" },
226
+ { key: "feature_1", text: "Push to production in seconds, not hours", context: "description", max_length: 60,
227
+ tone: "punchy" },
228
+ { key: "feature_2", text: "Zero-downtime deploys, instant rollbacks, and real-time logs", context: "description",
229
+ max_length: 80, tone: "technical" },
230
+ { key: "cta_primary", text: "Start free — no credit card required", context: "cta", max_length: 50,
231
+ tone: "punchy" },
232
+ { key: "testimonial", text: "Acme Deploy cut our deployment time from 45 minutes to 30 seconds.",
233
+ context: "testimonial", max_length: 100, tone: "formal" }
234
+ ]
235
+ }.to_json
236
+
237
+ translate_response = {
238
+ source_lang: "en", target_lang: "fr",
239
+ translations: [
240
+ { key: "hero_headline", original: "Ship faster with Acme Deploy",
241
+ translated: "Déployez plus vite avec Acme Deploy", context: "headline", max_length: 40, original_length: 29, translated_length: 36 },
242
+ { key: "hero_sub", original: "The deployment platform built for modern engineering teams",
243
+ translated: "La plateforme de déploiement pour les équipes d'ingénierie modernes", context: "subheadline", max_length: 80, original_length: 57, translated_length: 67 },
244
+ { key: "feature_1", original: "Push to production in seconds, not hours",
245
+ translated: "En production en secondes, pas en heures", context: "description", max_length: 60, original_length: 41, translated_length: 41 },
246
+ { key: "feature_2", original: "Zero-downtime deploys, instant rollbacks, and real-time logs",
247
+ translated: "Déploiements sans interruption, rollbacks instantanés et logs en temps réel", context: "description", max_length: 80, original_length: 60, translated_length: 75 },
248
+ { key: "cta_primary", original: "Start free — no credit card required",
249
+ translated: "Essai gratuit — sans carte bancaire", context: "cta", max_length: 50, original_length: 36, translated_length: 36 },
250
+ { key: "testimonial", original: "Acme Deploy cut our deployment time from 45 minutes to 30 seconds.",
251
+ translated: "Acme Deploy a réduit notre temps de déploiement de 45 minutes à 30 secondes.", context: "testimonial", max_length: 100, original_length: 66, translated_length: 76 }
252
+ ]
253
+ }.to_json
254
+
255
+ review_response = {
256
+ target_lang: "fr", total_segments: 6, passed_segments: 5,
257
+ reviews: [
258
+ { key: "hero_headline", verdict: "pass", issue: "" },
259
+ { key: "hero_sub", verdict: "pass", issue: "" },
260
+ { key: "feature_1", verdict: "pass", issue: "" },
261
+ { key: "feature_2", verdict: "warning", issue: "Slightly long — consider shorter phrasing for mobile" },
262
+ { key: "cta_primary", verdict: "pass", issue: "" },
263
+ { key: "testimonial", verdict: "pass", issue: "" }
264
+ ]
265
+ }.to_json
266
+
267
+ puts "=" * 60
268
+ puts "TRANSLATION PIPELINE: en → fr"
269
+ puts "=" * 60
270
+
271
+ # Run each step with its own adapter
272
+ puts "\n--- Step 1: Extract segments ---"
273
+ r1 = ExtractSegments.run(input, context: { adapter: RubyLLM::Contract::Adapters::Test.new(response: extract_response) })
274
+ puts "Status: #{r1.status} | Segments: #{r1.parsed_output[:segments].length}"
275
+ r1.parsed_output[:segments].each do |s|
276
+ puts " #{s[:key].ljust(16)} [#{s[:context].ljust(12)}] #{s[:text][0..50]}... (max: #{s[:max_length]})"
277
+ end
278
+
279
+ puts "\n--- Step 2: Translate ---"
280
+ r2 = TranslateSegments.run(r1.parsed_output,
281
+ context: { adapter: RubyLLM::Contract::Adapters::Test.new(response: translate_response) })
282
+ puts "Status: #{r2.status}"
283
+ r2.parsed_output[:translations].each do |t|
284
+ len_ok = t[:translated_length] <= 80 ? "✓" : "⚠"
285
+ puts " #{len_ok} #{t[:key].ljust(16)} #{t[:translated][0..60]}"
286
+ end
287
+
288
+ puts "\n--- Step 3: Review ---"
289
+ r3 = ReviewTranslations.run(r2.parsed_output,
290
+ context: { adapter: RubyLLM::Contract::Adapters::Test.new(response: review_response) })
291
+ puts "Status: #{r3.status} | Passed: #{r3.parsed_output[:passed_segments]}/#{r3.parsed_output[:total_segments]}"
292
+ r3.parsed_output[:reviews].each do |r|
293
+ icon = { "pass" => "✓", "warning" => "⚠", "fail" => "✗" }[r[:verdict]]
294
+ line = " #{icon} #{r[:key]}"
295
+ line += " — #{r[:issue]}" unless r[:issue].to_s.empty?
296
+ puts line
297
+ end
298
+
299
+ # =============================================================================
300
+ # INVARIANT CATCHES
301
+ # =============================================================================
302
+
303
+ puts "\n\n--- Invariant catches: missing translation ---"
304
+ incomplete = {
305
+ source_lang: "en", target_lang: "fr",
306
+ translations: [
307
+ { key: "hero_headline", original: "Ship faster", translated: "Déployez vite", context: "headline", original_length: 11, translated_length: 13 }
308
+ # Missing 5 other segments!
309
+ ]
310
+ }.to_json
311
+
312
+ r_bad = TranslateSegments.run(r1.parsed_output,
313
+ context: { adapter: RubyLLM::Contract::Adapters::Test.new(response: incomplete) })
314
+ puts "Status: #{r_bad.status}"
315
+ puts "Errors: #{r_bad.validation_errors}"
316
+
317
+ puts "\n--- Invariant catches: translation too long ---"
318
+ too_long = translate_response.gsub(
319
+ "Déployez plus vite avec Acme Deploy",
320
+ "Déployez beaucoup plus rapidement et efficacement avec la plateforme Acme Deploy"
321
+ )
322
+ r_long = TranslateSegments.run(r1.parsed_output,
323
+ context: { adapter: RubyLLM::Contract::Adapters::Test.new(response: too_long) })
324
+ puts "Status: #{r_long.status}"
325
+ puts "Errors: #{r_long.validation_errors}"
326
+
327
+ puts "\n--- Invariant catches: untranslated (echoed back) ---"
328
+ echoed = translate_response.gsub("Essai gratuit — sans carte bancaire", "Start free — no credit card required")
329
+ r_echo = TranslateSegments.run(r1.parsed_output,
330
+ context: { adapter: RubyLLM::Contract::Adapters::Test.new(response: echoed) })
331
+ puts "Status: #{r_echo.status}"
332
+ puts "Errors: #{r_echo.validation_errors}"
333
+
334
+ # =============================================================================
335
+ # SUMMARY
336
+ #
337
+ # 3 steps, 3 different LLM skills:
338
+ # 1. Extract (analysis) — find segments, assign context and constraints
339
+ # 2. Translate (creative) — translate respecting tone and length
340
+ # 3. Review (evaluation) — quality-check each translation
341
+ #
342
+ # Why 3 steps, not 1:
343
+ # - Each step has focused attention and its own schema
344
+ # - Step 3 evaluates step 2's work (shouldn't self-evaluate)
345
+ # - If extraction fails, no tokens wasted on translation
346
+ # - Each step independently testable and retryable
347
+ #
348
+ # Invariants catch:
349
+ # - Missing translations (not all segments covered)
350
+ # - Translation too long (exceeds max_length from step 1)
351
+ # - Untranslated text (model echoed back original)
352
+ # - Review inconsistency (counts don't match verdicts)
353
+ # =============================================================================