ruby_llm-contract 0.7.1 → 0.8.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +96 -0
- data/Gemfile.lock +3 -3
- data/README.md +64 -316
- data/examples/00_basics.rb +110 -428
- data/examples/01_fallback_showcase.rb +208 -0
- data/examples/02_real_llm_minimal.rb +45 -0
- data/examples/03_summarize_with_keywords.rb +128 -0
- data/examples/04_summarize_and_translate.rb +196 -0
- data/examples/05_eval_dataset.rb +144 -0
- data/examples/06_retry_variants.rb +147 -0
- data/examples/README.md +20 -128
- data/lib/ruby_llm/contract/adapters/ruby_llm.rb +22 -1
- data/lib/ruby_llm/contract/cost_calculator.rb +39 -0
- data/lib/ruby_llm/contract/eval/model_comparison.rb +4 -4
- data/lib/ruby_llm/contract/eval/retry_optimizer.rb +7 -3
- data/lib/ruby_llm/contract/step/base.rb +18 -1
- data/lib/ruby_llm/contract/step/dsl.rb +38 -0
- data/lib/ruby_llm/contract/step/limit_checker.rb +2 -2
- data/lib/ruby_llm/contract/token_estimator.rb +20 -3
- data/lib/ruby_llm/contract/version.rb +1 -1
- data/ruby_llm-contract.gemspec +6 -5
- metadata +14 -16
- data/examples/01_classify_threads.rb +0 -220
- data/examples/02_generate_comment.rb +0 -203
- data/examples/03_target_audience.rb +0 -201
- data/examples/04_real_llm.rb +0 -410
- data/examples/05_output_schema.rb +0 -258
- data/examples/07_keyword_extraction.rb +0 -239
- data/examples/08_translation.rb +0 -353
- data/examples/09_eval_dataset.rb +0 -287
- data/examples/10_reddit_full_showcase.rb +0 -363
|
@@ -200,6 +200,44 @@ module RubyLLM
|
|
|
200
200
|
superclass.temperature if superclass.respond_to?(:temperature)
|
|
201
201
|
end
|
|
202
202
|
|
|
203
|
+
def thinking(effort: nil, budget: nil)
|
|
204
|
+
if effort == :default
|
|
205
|
+
@thinking = nil
|
|
206
|
+
@thinking_explicitly_unset = true
|
|
207
|
+
return nil
|
|
208
|
+
end
|
|
209
|
+
|
|
210
|
+
if effort || budget
|
|
211
|
+
@thinking_explicitly_unset = false
|
|
212
|
+
return @thinking = { effort: effort, budget: budget }.compact
|
|
213
|
+
end
|
|
214
|
+
|
|
215
|
+
return @thinking if defined?(@thinking) && !@thinking_explicitly_unset
|
|
216
|
+
return nil if @thinking_explicitly_unset
|
|
217
|
+
|
|
218
|
+
superclass.thinking if superclass.respond_to?(:thinking)
|
|
219
|
+
end
|
|
220
|
+
|
|
221
|
+
def reasoning_effort(value = nil)
|
|
222
|
+
return (thinking && thinking[:effort]) if value.nil?
|
|
223
|
+
|
|
224
|
+
# Alias is scoped to the effort dimension only. `:default` on the
|
|
225
|
+
# alias clears effort but PRESERVES any previously-set budget — the
|
|
226
|
+
# name does not suggest "wipe the whole thinking config." Use the
|
|
227
|
+
# full `thinking(effort: :default)` to clear everything.
|
|
228
|
+
if value == :default
|
|
229
|
+
current_budget = thinking && thinking[:budget]
|
|
230
|
+
if current_budget
|
|
231
|
+
@thinking_explicitly_unset = false
|
|
232
|
+
@thinking = { budget: current_budget }
|
|
233
|
+
return nil
|
|
234
|
+
end
|
|
235
|
+
return thinking(effort: :default)
|
|
236
|
+
end
|
|
237
|
+
|
|
238
|
+
thinking(effort: value)
|
|
239
|
+
end
|
|
240
|
+
|
|
203
241
|
def around_call(&block)
|
|
204
242
|
if block
|
|
205
243
|
return @around_call = block
|
|
@@ -22,7 +22,7 @@ module RubyLLM
|
|
|
22
22
|
def collect_limit_errors(estimated)
|
|
23
23
|
errors = []
|
|
24
24
|
if max_input && estimated > max_input
|
|
25
|
-
errors << "Input token limit exceeded: estimated #{estimated} tokens, max #{max_input}"
|
|
25
|
+
errors << "Input token limit exceeded: estimated #{estimated} tokens (heuristic ±30%), max #{max_input}"
|
|
26
26
|
end
|
|
27
27
|
append_cost_error(estimated, errors) if max_cost
|
|
28
28
|
errors
|
|
@@ -46,7 +46,7 @@ module RubyLLM
|
|
|
46
46
|
handle_unknown_pricing(errors)
|
|
47
47
|
elsif estimated_cost > max_cost
|
|
48
48
|
errors << "Cost limit exceeded: estimated $#{format("%.6f", estimated_cost)} " \
|
|
49
|
-
"(#{estimated} input + #{estimated_output} output tokens), " \
|
|
49
|
+
"(#{estimated} input + #{estimated_output} output tokens, heuristic ±30%), " \
|
|
50
50
|
"max $#{format("%.6f", max_cost)}"
|
|
51
51
|
end
|
|
52
52
|
end
|
|
@@ -2,12 +2,29 @@
|
|
|
2
2
|
|
|
3
3
|
module RubyLLM
|
|
4
4
|
module Contract
|
|
5
|
+
# Pre-flight token estimation for `max_input` / `max_cost` budget gating.
|
|
6
|
+
#
|
|
7
|
+
# IMPORTANT — heuristic only. This is NOT an accurate tokenizer.
|
|
8
|
+
# The estimate uses a fixed `length / CHARS_PER_TOKEN` ratio:
|
|
9
|
+
#
|
|
10
|
+
# - Accurate to ±30% for English prose with mainstream OpenAI / Anthropic models
|
|
11
|
+
# - Worse for non-English text, code, structured data, and unusual scripts
|
|
12
|
+
# - Useless for models with very different tokenizers (e.g. some open-source models)
|
|
13
|
+
#
|
|
14
|
+
# RubyLLM 1.14 ships no pre-flight tokenizer either; once the API call
|
|
15
|
+
# returns, `RubyLLM::Tokens` provides accurate counts from provider usage
|
|
16
|
+
# data. This estimator is for the *pre-flight refusal* path only — its job
|
|
17
|
+
# is to answer "is this call almost certainly within budget?" with enough
|
|
18
|
+
# accuracy that runaway prompts get caught, while accepting that the
|
|
19
|
+
# boundary cases will be wrong.
|
|
20
|
+
#
|
|
21
|
+
# Refusal messages from `LimitChecker` carry an "(heuristic)" suffix so
|
|
22
|
+
# adopters know the number is estimated, not measured.
|
|
5
23
|
module TokenEstimator
|
|
6
|
-
# Heuristic: ~4 characters per token for English text.
|
|
7
|
-
# This is a rough estimate — actual tokenization varies by model and content.
|
|
8
|
-
# Intentionally conservative (overestimates slightly) to avoid surprise costs.
|
|
9
24
|
CHARS_PER_TOKEN = 4
|
|
10
25
|
|
|
26
|
+
# Heuristic estimate. Returns an integer token count.
|
|
27
|
+
# See module docstring for accuracy caveats.
|
|
11
28
|
def self.estimate(messages)
|
|
12
29
|
return 0 unless messages.is_a?(Array)
|
|
13
30
|
|
data/ruby_llm-contract.gemspec
CHANGED
|
@@ -7,10 +7,11 @@ Gem::Specification.new do |spec|
|
|
|
7
7
|
spec.version = RubyLLM::Contract::VERSION
|
|
8
8
|
spec.authors = ["Justyna"]
|
|
9
9
|
|
|
10
|
-
spec.summary = "
|
|
11
|
-
spec.description = "
|
|
12
|
-
"
|
|
13
|
-
"
|
|
10
|
+
spec.summary = "Contracts + Evals for ruby_llm"
|
|
11
|
+
spec.description = "Wraps RubyLLM::Chat with input/output contracts, business-rule validation, " \
|
|
12
|
+
"retry with model escalation on validation failure, pre-flight cost ceilings, " \
|
|
13
|
+
"and an evaluation framework. Sibling abstraction to RubyLLM::Agent — same " \
|
|
14
|
+
"niche (reusable class-based prompts), wider contract."
|
|
14
15
|
spec.homepage = "https://github.com/justi/ruby_llm-contract"
|
|
15
16
|
spec.license = "MIT"
|
|
16
17
|
spec.required_ruby_version = ">= 3.2.0"
|
|
@@ -30,6 +31,6 @@ Gem::Specification.new do |spec|
|
|
|
30
31
|
spec.require_paths = ["lib"]
|
|
31
32
|
|
|
32
33
|
spec.add_dependency "dry-types", "~> 1.7"
|
|
33
|
-
spec.add_dependency "ruby_llm", "~> 1.
|
|
34
|
+
spec.add_dependency "ruby_llm", "~> 1.12"
|
|
34
35
|
spec.add_dependency "ruby_llm-schema", "~> 0.3"
|
|
35
36
|
end
|
metadata
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: ruby_llm-contract
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 0.
|
|
4
|
+
version: 0.8.0
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
7
|
- Justyna
|
|
@@ -29,14 +29,14 @@ dependencies:
|
|
|
29
29
|
requirements:
|
|
30
30
|
- - "~>"
|
|
31
31
|
- !ruby/object:Gem::Version
|
|
32
|
-
version: '1.
|
|
32
|
+
version: '1.12'
|
|
33
33
|
type: :runtime
|
|
34
34
|
prerelease: false
|
|
35
35
|
version_requirements: !ruby/object:Gem::Requirement
|
|
36
36
|
requirements:
|
|
37
37
|
- - "~>"
|
|
38
38
|
- !ruby/object:Gem::Version
|
|
39
|
-
version: '1.
|
|
39
|
+
version: '1.12'
|
|
40
40
|
- !ruby/object:Gem::Dependency
|
|
41
41
|
name: ruby_llm-schema
|
|
42
42
|
requirement: !ruby/object:Gem::Requirement
|
|
@@ -51,9 +51,10 @@ dependencies:
|
|
|
51
51
|
- - "~>"
|
|
52
52
|
- !ruby/object:Gem::Version
|
|
53
53
|
version: '0.3'
|
|
54
|
-
description:
|
|
55
|
-
|
|
56
|
-
|
|
54
|
+
description: Wraps RubyLLM::Chat with input/output contracts, business-rule validation,
|
|
55
|
+
retry with model escalation on validation failure, pre-flight cost ceilings, and
|
|
56
|
+
an evaluation framework. Sibling abstraction to RubyLLM::Agent — same niche (reusable
|
|
57
|
+
class-based prompts), wider contract.
|
|
57
58
|
executables: []
|
|
58
59
|
extensions: []
|
|
59
60
|
extra_rdoc_files: []
|
|
@@ -69,15 +70,12 @@ files:
|
|
|
69
70
|
- README.md
|
|
70
71
|
- Rakefile
|
|
71
72
|
- examples/00_basics.rb
|
|
72
|
-
- examples/
|
|
73
|
-
- examples/
|
|
74
|
-
- examples/
|
|
75
|
-
- examples/
|
|
76
|
-
- examples/
|
|
77
|
-
- examples/
|
|
78
|
-
- examples/08_translation.rb
|
|
79
|
-
- examples/09_eval_dataset.rb
|
|
80
|
-
- examples/10_reddit_full_showcase.rb
|
|
73
|
+
- examples/01_fallback_showcase.rb
|
|
74
|
+
- examples/02_real_llm_minimal.rb
|
|
75
|
+
- examples/03_summarize_with_keywords.rb
|
|
76
|
+
- examples/04_summarize_and_translate.rb
|
|
77
|
+
- examples/05_eval_dataset.rb
|
|
78
|
+
- examples/06_retry_variants.rb
|
|
81
79
|
- examples/README.md
|
|
82
80
|
- lib/ruby_llm/contract.rb
|
|
83
81
|
- lib/ruby_llm/contract/adapters.rb
|
|
@@ -208,5 +206,5 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
|
208
206
|
requirements: []
|
|
209
207
|
rubygems_version: 3.6.7
|
|
210
208
|
specification_version: 4
|
|
211
|
-
summary:
|
|
209
|
+
summary: Contracts + Evals for ruby_llm
|
|
212
210
|
test_files: []
|
|
@@ -1,220 +0,0 @@
|
|
|
1
|
-
# frozen_string_literal: true
|
|
2
|
-
|
|
3
|
-
# =============================================================================
|
|
4
|
-
# EXAMPLE 1: Thread Classification (PROMO / FILLER / SKIP)
|
|
5
|
-
#
|
|
6
|
-
# Real-world case: A Reddit promotion planner needs to classify threads
|
|
7
|
-
# into PROMO (worth commenting with a product link), FILLER (worth a
|
|
8
|
-
# genuine comment without product mention), or SKIP (irrelevant).
|
|
9
|
-
# =============================================================================
|
|
10
|
-
|
|
11
|
-
require_relative "../lib/ruby_llm/contract"
|
|
12
|
-
|
|
13
|
-
# =============================================================================
|
|
14
|
-
# BEFORE: Legacy approach (inline heredoc + ad-hoc validation)
|
|
15
|
-
# =============================================================================
|
|
16
|
-
#
|
|
17
|
-
# In the legacy codebase, this lives across multiple concern files:
|
|
18
|
-
# - classification_prompts.rb (prompt building)
|
|
19
|
-
# - thread_classification.rb (LLM calling + parsing)
|
|
20
|
-
# - llm_result_mapper.rb (ID matching with positional fallback)
|
|
21
|
-
#
|
|
22
|
-
# ```ruby
|
|
23
|
-
# # classification_prompts.rb
|
|
24
|
-
# def build_classify_prompt(items)
|
|
25
|
-
# <<~PROMPT
|
|
26
|
-
# #{classify_product_header}
|
|
27
|
-
# #{classify_sitemap_section}
|
|
28
|
-
# Classify each Reddit thread below for this product's promotion campaign.
|
|
29
|
-
#
|
|
30
|
-
# For each thread, decide:
|
|
31
|
-
# #{classify_decision_rules}
|
|
32
|
-
#
|
|
33
|
-
# IMPORTANT: Be careful with PROMO. Follow these rules:
|
|
34
|
-
# #{classify_promo_caution_rules}
|
|
35
|
-
#
|
|
36
|
-
# Also provide:
|
|
37
|
-
# #{classify_output_fields}
|
|
38
|
-
#
|
|
39
|
-
# Threads:
|
|
40
|
-
# #{items.to_json}
|
|
41
|
-
# PROMPT
|
|
42
|
-
# end
|
|
43
|
-
#
|
|
44
|
-
# # thread_classification.rb
|
|
45
|
-
# def classify_batch_via_llm(batch)
|
|
46
|
-
# items = build_classify_items(batch)
|
|
47
|
-
# prompt = build_classify_prompt(items)
|
|
48
|
-
# response = ai_call(prompt, schema: classify_response_schema)
|
|
49
|
-
# parsed = parse_llm_json(response)
|
|
50
|
-
# # Manual ID matching with positional fallback (masks errors!)
|
|
51
|
-
# map_llm_results_by_id(items, parsed["threads"])
|
|
52
|
-
# end
|
|
53
|
-
# ```
|
|
54
|
-
#
|
|
55
|
-
# PROBLEMS:
|
|
56
|
-
# - Prompt is a string concatenation of 6 helper methods
|
|
57
|
-
# - No contract on output — if model returns wrong enum, it silently propagates
|
|
58
|
-
# - ID matching has a positional fallback that masks when model rewrites IDs
|
|
59
|
-
# - No way to test prompt quality without hitting the API
|
|
60
|
-
# - Change one line in classify_promo_caution_rules → no idea what broke
|
|
61
|
-
|
|
62
|
-
# =============================================================================
|
|
63
|
-
# AFTER: ruby_llm-contract approach
|
|
64
|
-
# =============================================================================
|
|
65
|
-
|
|
66
|
-
class ClassifyThreads < RubyLLM::Contract::Step::Base
|
|
67
|
-
input_type RubyLLM::Contract::Types::Array.of(RubyLLM::Contract::Types::Hash)
|
|
68
|
-
output_type RubyLLM::Contract::Types::Array.of(RubyLLM::Contract::Types::Hash)
|
|
69
|
-
|
|
70
|
-
prompt do
|
|
71
|
-
system "You classify Reddit threads for a product promotion campaign."
|
|
72
|
-
|
|
73
|
-
rule "For each thread, classify as PROMO, FILLER, or SKIP."
|
|
74
|
-
rule "PROMO: thread author has a problem where the product helps naturally."
|
|
75
|
-
rule "FILLER: related to domain, good for a genuine comment without product mention."
|
|
76
|
-
rule "SKIP: irrelevant, low engagement, hostile to recommendations, grief/politics."
|
|
77
|
-
rule "Return a JSON array with id, classification, relevance_score (0-10), and thread_intent."
|
|
78
|
-
rule "thread_intent must be one of: seeking_help, sharing, discussion, venting."
|
|
79
|
-
|
|
80
|
-
section "SCORING GUIDE", <<~GUIDE
|
|
81
|
-
8-10: Clear problem/situation the product solves
|
|
82
|
-
5-7: Author is in target audience, link would fit naturally
|
|
83
|
-
2-4: Same broad domain but weak connection
|
|
84
|
-
0-1: Irrelevant
|
|
85
|
-
GUIDE
|
|
86
|
-
|
|
87
|
-
user "{input}"
|
|
88
|
-
end
|
|
89
|
-
|
|
90
|
-
# Structural: every input ID must appear in output
|
|
91
|
-
validate("all thread IDs must match input") do |output, input|
|
|
92
|
-
output.map { |r| r[:id] }.sort == input.map { |t| t[:id] }.sort
|
|
93
|
-
end
|
|
94
|
-
|
|
95
|
-
# Enum: classification must be valid
|
|
96
|
-
validate("classification must be PROMO, FILLER, or SKIP") do |output|
|
|
97
|
-
output.all? { |r| %w[PROMO FILLER SKIP].include?(r[:classification]) }
|
|
98
|
-
end
|
|
99
|
-
|
|
100
|
-
# Consistency: PROMO threads must have decent relevance
|
|
101
|
-
validate("PROMO threads must have relevance_score >= 5") do |output|
|
|
102
|
-
output.select { |r| r[:classification] == "PROMO" }
|
|
103
|
-
.all? { |r| r[:relevance_score].is_a?(Integer) && r[:relevance_score] >= 5 }
|
|
104
|
-
end
|
|
105
|
-
|
|
106
|
-
# Enum: thread_intent must be valid
|
|
107
|
-
validate("thread_intent must be valid") do |output|
|
|
108
|
-
valid = %w[seeking_help sharing discussion venting]
|
|
109
|
-
output.all? { |r| valid.include?(r[:thread_intent]) }
|
|
110
|
-
end
|
|
111
|
-
end
|
|
112
|
-
|
|
113
|
-
# =============================================================================
|
|
114
|
-
# AFTER + SCHEMA: output_schema replaces structural invariants
|
|
115
|
-
#
|
|
116
|
-
# Compare with the version above:
|
|
117
|
-
# - classification enum → schema
|
|
118
|
-
# - thread_intent enum → schema
|
|
119
|
-
# - relevance_score type/range → schema
|
|
120
|
-
# - ID matching → still an invariant (cross-validation with input)
|
|
121
|
-
# - PROMO score check → still an invariant (conditional logic)
|
|
122
|
-
# =============================================================================
|
|
123
|
-
|
|
124
|
-
class ClassifyThreadsWithSchema < RubyLLM::Contract::Step::Base
|
|
125
|
-
input_type RubyLLM::Contract::Types::Array.of(RubyLLM::Contract::Types::Hash)
|
|
126
|
-
|
|
127
|
-
output_schema do
|
|
128
|
-
array :threads do
|
|
129
|
-
string :id
|
|
130
|
-
string :classification, enum: %w[PROMO FILLER SKIP]
|
|
131
|
-
integer :relevance_score, minimum: 0, maximum: 10
|
|
132
|
-
string :thread_intent, enum: %w[seeking_help sharing discussion venting]
|
|
133
|
-
end
|
|
134
|
-
end
|
|
135
|
-
|
|
136
|
-
prompt do
|
|
137
|
-
system "You classify Reddit threads for a product promotion campaign."
|
|
138
|
-
|
|
139
|
-
rule "For each thread, classify as PROMO, FILLER, or SKIP."
|
|
140
|
-
rule "PROMO: thread author has a problem where the product helps naturally."
|
|
141
|
-
rule "FILLER: related to domain, good for a genuine comment without product mention."
|
|
142
|
-
rule "SKIP: irrelevant, low engagement, hostile to recommendations, grief/politics."
|
|
143
|
-
rule "Return JSON with a threads array. Each entry: id, classification, relevance_score (0-10), thread_intent."
|
|
144
|
-
rule "thread_intent must be one of: seeking_help, sharing, discussion, venting."
|
|
145
|
-
|
|
146
|
-
section "SCORING GUIDE", <<~GUIDE
|
|
147
|
-
8-10: Clear problem/situation the product solves
|
|
148
|
-
5-7: Author is in target audience, link would fit naturally
|
|
149
|
-
2-4: Same broad domain but weak connection
|
|
150
|
-
0-1: Irrelevant
|
|
151
|
-
GUIDE
|
|
152
|
-
|
|
153
|
-
user "{input}"
|
|
154
|
-
end
|
|
155
|
-
|
|
156
|
-
# Only custom business logic — structural constraints are in the schema
|
|
157
|
-
validate("all thread IDs must match input") do |output, input|
|
|
158
|
-
output[:threads].map { |r| r[:id] }.sort == input.map { |t| t[:id] }.sort
|
|
159
|
-
end
|
|
160
|
-
|
|
161
|
-
validate("PROMO threads must have relevance_score >= 5") do |output|
|
|
162
|
-
output[:threads].select { |r| r[:classification] == "PROMO" }
|
|
163
|
-
.all? { |r| r[:relevance_score] >= 5 }
|
|
164
|
-
end
|
|
165
|
-
end
|
|
166
|
-
|
|
167
|
-
# =============================================================================
|
|
168
|
-
# DEMO: Run with test adapter
|
|
169
|
-
# =============================================================================
|
|
170
|
-
|
|
171
|
-
sample_threads = [
|
|
172
|
-
{ id: "t1", subreddit: "crochet", title: "spent way too much on yarn this month lol", selftext: "anyone else?" },
|
|
173
|
-
{ id: "t2", subreddit: "gaming", title: "my cat destroyed my controller", selftext: "RIP" },
|
|
174
|
-
{ id: "t3", subreddit: "deals", title: "best craft supply deals?", selftext: "looking for yarn and fabric sales" }
|
|
175
|
-
]
|
|
176
|
-
|
|
177
|
-
# Happy path — valid response
|
|
178
|
-
valid_response = [
|
|
179
|
-
{ id: "t1", classification: "PROMO", relevance_score: 7, thread_intent: "venting", matched_page: "/yarn-deals" },
|
|
180
|
-
{ id: "t2", classification: "SKIP", relevance_score: 1, thread_intent: "venting", matched_page: "" },
|
|
181
|
-
{ id: "t3", classification: "PROMO", relevance_score: 9, thread_intent: "seeking_help", matched_page: "/craft-deals" }
|
|
182
|
-
].to_json
|
|
183
|
-
|
|
184
|
-
adapter = RubyLLM::Contract::Adapters::Test.new(response: valid_response)
|
|
185
|
-
result = ClassifyThreads.run(sample_threads, context: { adapter: adapter, model: "gpt-5-mini" })
|
|
186
|
-
|
|
187
|
-
puts "=== HAPPY PATH ==="
|
|
188
|
-
puts "Status: #{result.status}"
|
|
189
|
-
puts "Parsed output: #{result.parsed_output.map { |r| "#{r[:id]}=#{r[:classification]}" }.join(", ")}"
|
|
190
|
-
puts "Validation errors: #{result.validation_errors}"
|
|
191
|
-
puts
|
|
192
|
-
|
|
193
|
-
# Bad path — model returns wrong enum
|
|
194
|
-
bad_response = [
|
|
195
|
-
{ id: "t1", classification: "MAYBE", relevance_score: 7, thread_intent: "venting" },
|
|
196
|
-
{ id: "t2", classification: "SKIP", relevance_score: 1, thread_intent: "venting" },
|
|
197
|
-
{ id: "t3", classification: "PROMO", relevance_score: 9, thread_intent: "seeking_help" }
|
|
198
|
-
].to_json
|
|
199
|
-
|
|
200
|
-
bad_adapter = RubyLLM::Contract::Adapters::Test.new(response: bad_response)
|
|
201
|
-
result = ClassifyThreads.run(sample_threads, context: { adapter: bad_adapter })
|
|
202
|
-
|
|
203
|
-
puts "=== BAD ENUM ==="
|
|
204
|
-
puts "Status: #{result.status}"
|
|
205
|
-
puts "Validation errors: #{result.validation_errors}"
|
|
206
|
-
puts
|
|
207
|
-
|
|
208
|
-
# Bad path — model rewrites IDs (the silent bug legacy code masked with positional fallback)
|
|
209
|
-
rewritten_ids_response = [
|
|
210
|
-
{ id: "thread_1", classification: "PROMO", relevance_score: 7, thread_intent: "venting" },
|
|
211
|
-
{ id: "thread_2", classification: "SKIP", relevance_score: 1, thread_intent: "venting" },
|
|
212
|
-
{ id: "thread_3", classification: "PROMO", relevance_score: 9, thread_intent: "seeking_help" }
|
|
213
|
-
].to_json
|
|
214
|
-
|
|
215
|
-
rewritten_adapter = RubyLLM::Contract::Adapters::Test.new(response: rewritten_ids_response)
|
|
216
|
-
result = ClassifyThreads.run(sample_threads, context: { adapter: rewritten_adapter })
|
|
217
|
-
|
|
218
|
-
puts "=== REWRITTEN IDs (legacy code would silently fallback to positional matching) ==="
|
|
219
|
-
puts "Status: #{result.status}"
|
|
220
|
-
puts "Validation errors: #{result.validation_errors}"
|
|
@@ -1,203 +0,0 @@
|
|
|
1
|
-
# frozen_string_literal: true
|
|
2
|
-
|
|
3
|
-
# =============================================================================
|
|
4
|
-
# EXAMPLE 2: Promo Comment Generation
|
|
5
|
-
#
|
|
6
|
-
# Real-world case: Generate a Reddit comment that subtly promotes a product.
|
|
7
|
-
# The comment must match the thread's language, sound like a real user,
|
|
8
|
-
# include a product link naturally, and follow strict persona rules.
|
|
9
|
-
# =============================================================================
|
|
10
|
-
|
|
11
|
-
require_relative "../lib/ruby_llm/contract"
|
|
12
|
-
|
|
13
|
-
# =============================================================================
|
|
14
|
-
# BEFORE: Legacy approach (200+ lines across multiple concerns)
|
|
15
|
-
# =============================================================================
|
|
16
|
-
#
|
|
17
|
-
# In the legacy codebase, the prompt is assembled from 6+ helper methods
|
|
18
|
-
# across comment_prompts.rb (240 lines):
|
|
19
|
-
#
|
|
20
|
-
# ```ruby
|
|
21
|
-
# # System message built from 8 sections:
|
|
22
|
-
# def system_message_for_promo
|
|
23
|
-
# base_system_message(
|
|
24
|
-
# intro: "You write Reddit comments that subtly promote a product...",
|
|
25
|
-
# voice_lines: [
|
|
26
|
-
# "Sound like a genuine user who found something useful, not an ad.",
|
|
27
|
-
# 'Never say "I built" or "I made this".',
|
|
28
|
-
# "Sound like a real Reddit user: casual, no marketing speak...",
|
|
29
|
-
# # ... 10 more rules
|
|
30
|
-
# ],
|
|
31
|
-
# self_contained_lines: [...],
|
|
32
|
-
# extra_sections: [section("PROMO LINKING BASICS", [...])]
|
|
33
|
-
# )
|
|
34
|
-
# end
|
|
35
|
-
#
|
|
36
|
-
# # User prompt built by string concatenation:
|
|
37
|
-
# def build_promo_prompt(items, strict_language: false, comment_plan: nil)
|
|
38
|
-
# [
|
|
39
|
-
# <<~PRODUCT.strip,
|
|
40
|
-
# [PRODUCT]
|
|
41
|
-
# Domain: #{@url}
|
|
42
|
-
# PRODUCT
|
|
43
|
-
# pages_section,
|
|
44
|
-
# section("URL SELECTION", promo_url_selection_rules),
|
|
45
|
-
# section("PRODUCT MENTION", promo_product_mention_rules + [...]),
|
|
46
|
-
# comment_plan_section,
|
|
47
|
-
# comment_plan_rules,
|
|
48
|
-
# strict_lang_section,
|
|
49
|
-
# <<~ITEMS.strip
|
|
50
|
-
# [ITEMS]
|
|
51
|
-
# #{items.to_json}
|
|
52
|
-
# ITEMS
|
|
53
|
-
# ].compact.join("\n\n")
|
|
54
|
-
# end
|
|
55
|
-
# ```
|
|
56
|
-
#
|
|
57
|
-
# PROBLEMS:
|
|
58
|
-
# - 200+ lines of string building spread across 8+ methods
|
|
59
|
-
# - No validation on output — wrong language silently passes, caught later
|
|
60
|
-
# - Persona + voice + rules mixed with data (URL, pages, items)
|
|
61
|
-
# - Change one voice rule → no way to measure impact on output quality
|
|
62
|
-
# - The `.compact.join("\n\n")` pattern is fragile — easy to break structure
|
|
63
|
-
|
|
64
|
-
# =============================================================================
|
|
65
|
-
# AFTER: ruby_llm-contract approach
|
|
66
|
-
# =============================================================================
|
|
67
|
-
|
|
68
|
-
PERSONA = <<~PERSONA.strip
|
|
69
|
-
You are a woman, 40+, a maker. You solve your own problems by building \
|
|
70
|
-
software. Outside of code you crochet, sew, 3D-print, and do \
|
|
71
|
-
astrophotography with your son. You hunt deals both offline and online. \
|
|
72
|
-
Your writing style: reflective, inventive, casual. You make typos, skip \
|
|
73
|
-
Polish diacritics sometimes, write like a real person on a forum.
|
|
74
|
-
PERSONA
|
|
75
|
-
|
|
76
|
-
class GeneratePromoComment < RubyLLM::Contract::Step::Base
|
|
77
|
-
input_type RubyLLM::Contract::Types::Hash.schema(
|
|
78
|
-
thread_title: RubyLLM::Contract::Types::String,
|
|
79
|
-
thread_selftext: RubyLLM::Contract::Types::String,
|
|
80
|
-
subreddit: RubyLLM::Contract::Types::String,
|
|
81
|
-
target_length: RubyLLM::Contract::Types::Integer,
|
|
82
|
-
thread_language: RubyLLM::Contract::Types::String,
|
|
83
|
-
product_url: RubyLLM::Contract::Types::String,
|
|
84
|
-
matched_page_url: RubyLLM::Contract::Types::String
|
|
85
|
-
)
|
|
86
|
-
output_type Hash
|
|
87
|
-
|
|
88
|
-
prompt do
|
|
89
|
-
system "You write Reddit comments that subtly promote a product. Return valid JSON only."
|
|
90
|
-
|
|
91
|
-
section "PERSONA", PERSONA
|
|
92
|
-
|
|
93
|
-
rule "Sound like a genuine user who found something useful, not an ad."
|
|
94
|
-
rule 'Never say "I built" or "I made this".'
|
|
95
|
-
rule "Casual tone, no marketing speak, no emojis, no bullet points."
|
|
96
|
-
rule "Pick one specific angle and share it concretely."
|
|
97
|
-
rule "Be opinionated; say what worked for you, not generic balanced advice."
|
|
98
|
-
rule 'NEVER start with "Nice X", "Cool X", "Love this". Jump straight into your point.'
|
|
99
|
-
rule "Give 2-3 options; the product link should be ONE of them, not the whole point."
|
|
100
|
-
rule "The comment must stand without the link."
|
|
101
|
-
rule 'Do not introduce the link with "PS:", "btw:", or parenthetical asides.'
|
|
102
|
-
rule "No markdown headers or formatting. Plain text only."
|
|
103
|
-
rule "Write in {thread_language}."
|
|
104
|
-
rule "Approximately {target_length} characters (±20%)."
|
|
105
|
-
|
|
106
|
-
section "PRODUCT", "Domain: {product_url}\nPage: {matched_page_url}"
|
|
107
|
-
|
|
108
|
-
user "r/{subreddit}: {thread_title}\n\n{thread_selftext}\n\nWrite a helpful comment."
|
|
109
|
-
end
|
|
110
|
-
|
|
111
|
-
validate("comment must not be empty") do |o|
|
|
112
|
-
o[:comment].is_a?(String) && o[:comment].strip.length > 10
|
|
113
|
-
end
|
|
114
|
-
|
|
115
|
-
validate("no markdown headers") do |o|
|
|
116
|
-
!o[:comment].to_s.match?(/^\#{2,}\s/)
|
|
117
|
-
end
|
|
118
|
-
|
|
119
|
-
validate("no emojis") do |o|
|
|
120
|
-
!o[:comment].to_s.match?(/[\u{1F600}-\u{1F64F}\u{1F300}-\u{1F5FF}\u{1F680}-\u{1F6FF}]/)
|
|
121
|
-
end
|
|
122
|
-
|
|
123
|
-
validate("includes product link") do |o, input|
|
|
124
|
-
o[:comment].to_s.include?(input[:matched_page_url])
|
|
125
|
-
end
|
|
126
|
-
|
|
127
|
-
validate("length within ±30% of target") do |o, input|
|
|
128
|
-
len = o[:comment].to_s.length
|
|
129
|
-
target = input[:target_length]
|
|
130
|
-
len.between?((target * 0.7).to_i, (target * 1.3).to_i)
|
|
131
|
-
end
|
|
132
|
-
|
|
133
|
-
validate("does not start with banned openings") do |o|
|
|
134
|
-
banned = ["Nice ", "Cool ", "Love this", "Great ", "Totally agree"]
|
|
135
|
-
banned.none? { |b| o[:comment].to_s.start_with?(b) }
|
|
136
|
-
end
|
|
137
|
-
end
|
|
138
|
-
|
|
139
|
-
# =============================================================================
|
|
140
|
-
# DEMO: Run with test adapter
|
|
141
|
-
# =============================================================================
|
|
142
|
-
|
|
143
|
-
input = {
|
|
144
|
-
thread_title: "spent way too much on yarn this month lol",
|
|
145
|
-
thread_selftext: "Between Drops and the new Scheepjes line I'm broke. Anyone else track their spending?",
|
|
146
|
-
subreddit: "crochet",
|
|
147
|
-
target_length: 200,
|
|
148
|
-
thread_language: "en",
|
|
149
|
-
product_url: "https://deals.example.com",
|
|
150
|
-
matched_page_url: "https://deals.example.com/yarn-deals"
|
|
151
|
-
}
|
|
152
|
-
|
|
153
|
-
# Happy path — good comment
|
|
154
|
-
good_comment = {
|
|
155
|
-
comment: "Ugh same. I started tracking last year and the numbers were brutal. " \
|
|
156
|
-
"What helped — monthly yarn budget plus checking https://deals.example.com/yarn-deals " \
|
|
157
|
-
"before impulse buying. Ravelry destash groups too."
|
|
158
|
-
}.to_json
|
|
159
|
-
|
|
160
|
-
adapter = RubyLLM::Contract::Adapters::Test.new(response: good_comment)
|
|
161
|
-
result = GeneratePromoComment.run(input, context: { adapter: adapter })
|
|
162
|
-
|
|
163
|
-
puts "=== HAPPY PATH ==="
|
|
164
|
-
puts "Status: #{result.status}"
|
|
165
|
-
puts "Comment: #{result.parsed_output[:comment]}"
|
|
166
|
-
puts "Validation errors: #{result.validation_errors}"
|
|
167
|
-
puts
|
|
168
|
-
|
|
169
|
-
# Bad path — starts with banned opening
|
|
170
|
-
bad_comment = {
|
|
171
|
-
comment: "Nice question! I track my yarn spending with a spreadsheet and also check " \
|
|
172
|
-
"https://deals.example.com/yarn-deals for sales."
|
|
173
|
-
}.to_json
|
|
174
|
-
|
|
175
|
-
bad_adapter = RubyLLM::Contract::Adapters::Test.new(response: bad_comment)
|
|
176
|
-
result = GeneratePromoComment.run(input, context: { adapter: bad_adapter })
|
|
177
|
-
|
|
178
|
-
puts "=== BANNED OPENING ==="
|
|
179
|
-
puts "Status: #{result.status}"
|
|
180
|
-
puts "Validation errors: #{result.validation_errors}"
|
|
181
|
-
puts
|
|
182
|
-
|
|
183
|
-
# Bad path — missing product link
|
|
184
|
-
no_link_comment = {
|
|
185
|
-
comment: "Same here. I started a spreadsheet and realized I spent way more than I thought. " \
|
|
186
|
-
"Ravelry destash groups are great for cheap yarn though."
|
|
187
|
-
}.to_json
|
|
188
|
-
|
|
189
|
-
no_link_adapter = RubyLLM::Contract::Adapters::Test.new(response: no_link_comment)
|
|
190
|
-
result = GeneratePromoComment.run(input, context: { adapter: no_link_adapter })
|
|
191
|
-
|
|
192
|
-
puts "=== MISSING LINK ==="
|
|
193
|
-
puts "Status: #{result.status}"
|
|
194
|
-
puts "Validation errors: #{result.validation_errors}"
|
|
195
|
-
puts
|
|
196
|
-
|
|
197
|
-
# Inspect the rendered prompt AST
|
|
198
|
-
puts "=== RENDERED PROMPT (first 3 messages) ==="
|
|
199
|
-
adapter = RubyLLM::Contract::Adapters::Test.new(response: good_comment)
|
|
200
|
-
result = GeneratePromoComment.run(input, context: { adapter: adapter })
|
|
201
|
-
result.trace[:messages].first(3).each do |msg|
|
|
202
|
-
puts " [#{msg[:role]}] #{msg[:content][0..80]}..."
|
|
203
|
-
end
|