ruby_llm-contract 0.3.6 → 0.3.7
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +8 -0
- data/Gemfile.lock +2 -2
- data/README.md +1 -1
- data/lib/ruby_llm/contract/eval/baseline_diff.rb +10 -2
- data/lib/ruby_llm/contract/eval/eval_definition.rb +2 -3
- data/lib/ruby_llm/contract/eval/trait_evaluator.rb +5 -2
- data/lib/ruby_llm/contract/prompt/builder.rb +5 -3
- data/lib/ruby_llm/contract/step/base.rb +2 -1
- data/lib/ruby_llm/contract/version.rb +1 -1
- data/lib/ruby_llm/contract.rb +4 -1
- data/ruby_llm-contract.gemspec +5 -3
- metadata +6 -4
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: dee963c252704634b8b9452e4e0460561e7795385e2dc59f4d5cc089a16d9210
|
|
4
|
+
data.tar.gz: ce289e0f1dee22a75d7079b28775c6dd0e5d85b01a54e5a97e4f47b40c2f5741
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: d10ff4021462051d80cb5205174a24f9c5093ee096fc5add7d5bfacc88fb936a364d474871c05d87dad404ffc9577c998e7a1ae73cc8a8e0a5868e7cef629c83
|
|
7
|
+
data.tar.gz: 914a370baf65d5e8fc62f78a22e3bc6ee9eba83b78257ac95b87c8d5965ae23e54dbb7a66de7b2b6c7dc3c848a513be22c2e37e76445d9094dd576f3d3867215
|
data/CHANGELOG.md
CHANGED
|
@@ -1,5 +1,13 @@
|
|
|
1
1
|
# Changelog
|
|
2
2
|
|
|
3
|
+
## 0.3.7 (2026-03-24)
|
|
4
|
+
|
|
5
|
+
- **Trait missing key = error** — `expected_traits: { title: 0..5 }` on output `{}` now fails instead of silently passing.
|
|
6
|
+
- **nil input in dynamic prompts** — `run(nil)` with `prompt { |input| ... }` correctly passes nil to block.
|
|
7
|
+
- **Defensive sample pre-validation** — `sample_response` uses the same parser as runtime (handles code fences, BOM, prose around JSON).
|
|
8
|
+
- **Baseline diff excludes skipped** — self-compare with skipped cases no longer shows artificial score delta.
|
|
9
|
+
- **Zeitwerk eval/ ignore** — `eager_load_contract_dirs!` ignores `eval/` subdirs before eager load.
|
|
10
|
+
|
|
3
11
|
## 0.3.6 (2026-03-24)
|
|
4
12
|
|
|
5
13
|
- **Recursive array/object validation** — nested arrays (`array of array of string`) validated recursively. Object items validated even without `:properties` (e.g. `additionalProperties: false`).
|
data/Gemfile.lock
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
PATH
|
|
2
2
|
remote: .
|
|
3
3
|
specs:
|
|
4
|
-
ruby_llm-contract (0.3.
|
|
4
|
+
ruby_llm-contract (0.3.7)
|
|
5
5
|
dry-types (~> 1.7)
|
|
6
6
|
ruby_llm (~> 1.0)
|
|
7
7
|
ruby_llm-schema (~> 0.3)
|
|
@@ -165,7 +165,7 @@ CHECKSUMS
|
|
|
165
165
|
rubocop-ast (1.49.1) sha256=4412f3ee70f6fe4546cc489548e0f6fcf76cafcfa80fa03af67098ffed755035
|
|
166
166
|
ruby-progressbar (1.13.0) sha256=80fc9c47a9b640d6834e0dc7b3c94c9df37f08cb072b7761e4a71e22cff29b33
|
|
167
167
|
ruby_llm (1.14.0) sha256=57c6f7034fc4a44504ea137d70f853b07824f1c1cdbe774ab3ab3522e7098deb
|
|
168
|
-
ruby_llm-contract (0.3.
|
|
168
|
+
ruby_llm-contract (0.3.7)
|
|
169
169
|
ruby_llm-schema (0.3.0) sha256=a591edc5ca1b7f0304f0e2261de61ba4b3bea17be09f5cf7558153adfda3dec6
|
|
170
170
|
unicode-display_width (3.2.0) sha256=0cdd96b5681a5949cdbc2c55e7b420facae74c4aaf9a9815eee1087cb1853c42
|
|
171
171
|
unicode-emoji (4.2.0) sha256=519e69150f75652e40bf736106cfbc8f0f73aa3fb6a65afe62fefa7f80b0f80f
|
data/README.md
CHANGED
|
@@ -6,7 +6,7 @@ Companion gem for [ruby_llm](https://github.com/crmne/ruby_llm).
|
|
|
6
6
|
|
|
7
7
|
## The problem
|
|
8
8
|
|
|
9
|
-
|
|
9
|
+
Which model should you use? The expensive one is accurate but costs 4x more. The cheap one is fast but hallucinates on edge cases. You tweak a prompt — did accuracy improve or drop? You have no data. Just gut feeling.
|
|
10
10
|
|
|
11
11
|
## The fix
|
|
12
12
|
|
|
@@ -9,8 +9,8 @@ module RubyLLM
|
|
|
9
9
|
def initialize(baseline_cases:, current_cases:)
|
|
10
10
|
@baseline = index_by_name(baseline_cases)
|
|
11
11
|
@current = index_by_name(current_cases)
|
|
12
|
-
@baseline_score = baseline_cases
|
|
13
|
-
@current_score = current_cases
|
|
12
|
+
@baseline_score = compute_score(baseline_cases)
|
|
13
|
+
@current_score = compute_score(current_cases)
|
|
14
14
|
freeze
|
|
15
15
|
end
|
|
16
16
|
|
|
@@ -78,6 +78,14 @@ module RubyLLM
|
|
|
78
78
|
|
|
79
79
|
private
|
|
80
80
|
|
|
81
|
+
def compute_score(cases)
|
|
82
|
+
# Exclude skipped cases from score (consistent with Report#score)
|
|
83
|
+
evaluated = cases.reject { |c| c[:details]&.start_with?("skipped:") }
|
|
84
|
+
return 0.0 if evaluated.empty?
|
|
85
|
+
|
|
86
|
+
evaluated.sum { |c| c[:score] } / evaluated.length
|
|
87
|
+
end
|
|
88
|
+
|
|
81
89
|
def index_by_name(cases)
|
|
82
90
|
cases.each_with_object({}) { |c, h| h[c[:name]] = c }
|
|
83
91
|
end
|
|
@@ -106,15 +106,14 @@ module RubyLLM
|
|
|
106
106
|
return if errors.empty?
|
|
107
107
|
|
|
108
108
|
raise ArgumentError, "sample_response does not satisfy step schema: #{errors.join(", ")}"
|
|
109
|
-
rescue JSON::ParserError => e
|
|
110
|
-
# Non-JSON string with a structured schema = clear error
|
|
109
|
+
rescue JSON::ParserError, RubyLLM::Contract::ParseError => e
|
|
111
110
|
raise ArgumentError, "sample_response is not valid JSON: #{e.message}"
|
|
112
111
|
end
|
|
113
112
|
|
|
114
113
|
def validate_sample_against_schema(schema)
|
|
115
114
|
parsed = case @sample_response
|
|
116
115
|
when Hash, Array then @sample_response
|
|
117
|
-
when String then
|
|
116
|
+
when String then Parser.parse(@sample_response, strategy: :json)
|
|
118
117
|
else @sample_response
|
|
119
118
|
end
|
|
120
119
|
symbolized = deep_symbolize(parsed)
|
|
@@ -19,8 +19,11 @@ module RubyLLM
|
|
|
19
19
|
end
|
|
20
20
|
|
|
21
21
|
def check_trait(output, key, expectation, errors)
|
|
22
|
-
|
|
23
|
-
|
|
22
|
+
unless output.is_a?(Hash) && output.key?(key)
|
|
23
|
+
errors << "#{key}: missing key"
|
|
24
|
+
return
|
|
25
|
+
end
|
|
26
|
+
error_msg = trait_error(key, output[key], expectation)
|
|
24
27
|
errors << error_msg if error_msg
|
|
25
28
|
end
|
|
26
29
|
|
|
@@ -4,14 +4,16 @@ module RubyLLM
|
|
|
4
4
|
module Contract
|
|
5
5
|
module Prompt
|
|
6
6
|
class Builder
|
|
7
|
+
NOT_PROVIDED = Object.new.freeze
|
|
8
|
+
|
|
7
9
|
def initialize(block)
|
|
8
10
|
@block = block
|
|
9
11
|
@nodes = []
|
|
10
12
|
end
|
|
11
13
|
|
|
12
|
-
def build(input =
|
|
14
|
+
def build(input = NOT_PROVIDED)
|
|
13
15
|
@nodes = []
|
|
14
|
-
if
|
|
16
|
+
if input != NOT_PROVIDED && @block.arity >= 1
|
|
15
17
|
instance_exec(input, &@block)
|
|
16
18
|
else
|
|
17
19
|
instance_eval(&@block)
|
|
@@ -39,7 +41,7 @@ module RubyLLM
|
|
|
39
41
|
@nodes << Nodes::SectionNode.new(name, text)
|
|
40
42
|
end
|
|
41
43
|
|
|
42
|
-
def self.build(input:
|
|
44
|
+
def self.build(input: NOT_PROVIDED, &block)
|
|
43
45
|
new(block).build(input)
|
|
44
46
|
end
|
|
45
47
|
end
|
|
@@ -82,7 +82,8 @@ module RubyLLM
|
|
|
82
82
|
|
|
83
83
|
def build_messages(input)
|
|
84
84
|
dynamic = prompt.arity >= 1
|
|
85
|
-
|
|
85
|
+
builder_input = dynamic ? input : Prompt::Builder::NOT_PROVIDED
|
|
86
|
+
ast = Prompt::Builder.build(input: builder_input, &prompt)
|
|
86
87
|
variables = dynamic ? {} : { input: input }
|
|
87
88
|
variables.merge!(input.transform_keys(&:to_sym)) if !dynamic && input.is_a?(Hash)
|
|
88
89
|
Prompt::Renderer.render(ast, variables: variables)
|
data/lib/ruby_llm/contract.rb
CHANGED
|
@@ -88,9 +88,12 @@ module RubyLLM
|
|
|
88
88
|
full = ::Rails.root.join(path)
|
|
89
89
|
next unless full.exist?
|
|
90
90
|
|
|
91
|
+
# Ignore eval/ subdirs — they don't define Zeitwerk-compatible
|
|
92
|
+
# constants and are loaded separately by load_evals!
|
|
93
|
+
eval_dir = full.join("eval")
|
|
94
|
+
::Rails.autoloaders.main.ignore(eval_dir.to_s) if eval_dir.exist?
|
|
91
95
|
::Rails.autoloaders.main.eager_load_dir(full.to_s)
|
|
92
96
|
rescue StandardError
|
|
93
|
-
# Zeitwerk not available or dir not managed — skip
|
|
94
97
|
nil
|
|
95
98
|
end
|
|
96
99
|
end
|
data/ruby_llm-contract.gemspec
CHANGED
|
@@ -7,9 +7,10 @@ Gem::Specification.new do |spec|
|
|
|
7
7
|
spec.version = RubyLLM::Contract::VERSION
|
|
8
8
|
spec.authors = ["Justyna"]
|
|
9
9
|
|
|
10
|
-
spec.summary = "
|
|
11
|
-
spec.description = "
|
|
12
|
-
"
|
|
10
|
+
spec.summary = "Know which LLM model to use, what it costs, and when accuracy drops"
|
|
11
|
+
spec.description = "Compare LLM models by accuracy and cost. Regression-test prompts in CI. " \
|
|
12
|
+
"Start on nano, auto-escalate to bigger models when quality drops. " \
|
|
13
|
+
"Companion gem for ruby_llm."
|
|
13
14
|
spec.homepage = "https://github.com/justi/ruby_llm-contract"
|
|
14
15
|
spec.license = "MIT"
|
|
15
16
|
spec.required_ruby_version = ">= 3.2.0"
|
|
@@ -17,6 +18,7 @@ Gem::Specification.new do |spec|
|
|
|
17
18
|
spec.metadata["homepage_uri"] = spec.homepage
|
|
18
19
|
spec.metadata["source_code_uri"] = spec.homepage
|
|
19
20
|
spec.metadata["changelog_uri"] = "#{spec.homepage}/blob/main/CHANGELOG.md"
|
|
21
|
+
spec.metadata["documentation_uri"] = "#{spec.homepage}#readme"
|
|
20
22
|
spec.metadata["rubygems_mfa_required"] = "true"
|
|
21
23
|
|
|
22
24
|
spec.files = Dir.chdir(__dir__) do
|
metadata
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: ruby_llm-contract
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 0.3.
|
|
4
|
+
version: 0.3.7
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
7
|
- Justyna
|
|
@@ -51,8 +51,9 @@ dependencies:
|
|
|
51
51
|
- - "~>"
|
|
52
52
|
- !ruby/object:Gem::Version
|
|
53
53
|
version: '0.3'
|
|
54
|
-
description:
|
|
55
|
-
|
|
54
|
+
description: Compare LLM models by accuracy and cost. Regression-test prompts in CI.
|
|
55
|
+
Start on nano, auto-escalate to bigger models when quality drops. Companion gem
|
|
56
|
+
for ruby_llm.
|
|
56
57
|
executables: []
|
|
57
58
|
extensions: []
|
|
58
59
|
extra_rdoc_files: []
|
|
@@ -154,6 +155,7 @@ metadata:
|
|
|
154
155
|
homepage_uri: https://github.com/justi/ruby_llm-contract
|
|
155
156
|
source_code_uri: https://github.com/justi/ruby_llm-contract
|
|
156
157
|
changelog_uri: https://github.com/justi/ruby_llm-contract/blob/main/CHANGELOG.md
|
|
158
|
+
documentation_uri: https://github.com/justi/ruby_llm-contract#readme
|
|
157
159
|
rubygems_mfa_required: 'true'
|
|
158
160
|
rdoc_options: []
|
|
159
161
|
require_paths:
|
|
@@ -171,5 +173,5 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
|
171
173
|
requirements: []
|
|
172
174
|
rubygems_version: 3.6.7
|
|
173
175
|
specification_version: 4
|
|
174
|
-
summary:
|
|
176
|
+
summary: Know which LLM model to use, what it costs, and when accuracy drops
|
|
175
177
|
test_files: []
|