RubyGems - rubric_llm - Versions diffs - 0.1.0 → 0.1.1 - Mend

rubric_llm 0.1.0 → 0.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (7) hide show

checksums.yaml +4 -4
data/CHANGELOG.md +9 -0
data/lib/rubric_llm/comparison.rb +32 -18
data/lib/rubric_llm/judge.rb +2 -1
data/lib/rubric_llm/version.rb +1 -1
data/lib/rubric_llm.rb +9 -2
metadata +2 -2

checksums.yaml CHANGED Viewed

@@ -1,7 +1,7 @@
 ---
 SHA256:
-  metadata.gz: a549a52da8585cfbdf8287315548389de6e93b1b0adca9b8fb32fedde3d966e5
-  data.tar.gz: 3de825ff22b9c4b3dc091cc17ef089e504f53238857d30393b1951d55884c38d
+  metadata.gz: 52a6704521634882ccc1d1779c20e2b65587f398b2cfd00b3c7892d9a97706e1
+  data.tar.gz: 3410606f13f439af21457ac9714b0e67a33752a3231854ccd0f73e759d4ac209
 SHA512:
-  metadata.gz: 004aff76b7b92d2266d75ec54b897d240cef7a86f424cbca8d1904d7e16eeb1821900c722e6df4f20be511936f90993e73b4e3aebd436e9d82f24ee558f12996
-  data.tar.gz: eacc3c28cb3d3504323dbbb491c3e9b8af6625520308cb319cab2d5164868ac56a52c3daed5a603e496c092e53a2c1a7b1f6f32754b9dc1c230ec0aebca30be9
+  metadata.gz: 17a9e4d4308627889576b29fbbdb91a93a12e702847780e21fe070cb499ac06d243fa5e688d520e4fd43827eb05ba86b85eafb039598160e31e273838ab69f1c
+  data.tar.gz: 86b21112726b41c2eef29cc2e0fc0f6b6b4d742a6c2797d46f391d0292fb2ad8477d3792029d1b90f562425118f52df496e998f6d434128c4b4017756aaa217d

data/CHANGELOG.md CHANGED Viewed

@@ -7,6 +7,15 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 ## [Unreleased]
+## [0.1.1] - 2026-03-24
+### Fixed
+- Use RubyLLM system instructions instead of the attachment API when calling the judge
+- Roll back invalid global configuration changes when `RubricLLM.configure` validation fails
+- Accept string-keyed batch dataset hashes in sequential and concurrent evaluation
+- Stabilize Student's t-test p-value calculation for small deltas and ordinary sample sizes
 ## [0.1.0] - 2026-03-24
 ### Added

data/lib/rubric_llm/comparison.rb CHANGED Viewed

@@ -102,40 +102,54 @@ module RubricLLM
       return 0.0 if x <= 0.0
       return 1.0 if x >= 1.0
-      ln_beta = Math.lgamma(a)[0] + Math.lgamma(b)[0] - Math.lgamma(a + b)[0]
-      front = Math.exp((a * Math.log(x)) + (b * Math.log(1.0 - x)) - ln_beta) / a
+      ln_beta = Math.lgamma(a + b)[0] - Math.lgamma(a)[0] - Math.lgamma(b)[0]
+      front = Math.exp(ln_beta + (a * Math.log(x)) + (b * Math.log(1.0 - x)))
+      result = if x < ((a + 1.0) / (a + b + 2.0))
+                 front * beta_continued_fraction(a, b, x) / a
+               else
+                 1.0 - ((front * beta_continued_fraction(b, a, 1.0 - x)) / b)
+               end
+      result.clamp(0.0, 1.0)
+    end
+    def beta_continued_fraction(a, b, x)
+      tiny = 1e-30
+      qab = a + b
+      qap = a + 1.0
+      qam = a - 1.0
-      # Lentz's continued fraction
       c = 1.0
-      d = 1.0 - ((a + b) * x / (a + 1.0))
-      d = 1.0 if d.abs < 1e-30
+      d = 1.0 - ((qab * x) / qap)
+      d = tiny if d.abs < tiny
       d = 1.0 / d
-      f = d
+      fraction = d
       (1..200).each do |m|
-        # Even step
-        numerator = m * (b - m) * x / ((a + (2 * m) - 1) * (a + (2 * m)))
+        m2 = 2 * m
+        numerator = (m * (b - m) * x) / ((qam + m2) * (a + m2))
         d = 1.0 + (numerator * d)
-        d = 1e-30 if d.abs < 1e-30
+        d = tiny if d.abs < tiny
         c = 1.0 + (numerator / c)
-        c = 1e-30 if c.abs < 1e-30
+        c = tiny if c.abs < tiny
         d = 1.0 / d
-        f *= c * d
+        fraction *= c * d
-        # Odd step
-        numerator = -(a + m) * (a + b + m) * x / ((a + (2 * m)) * (a + (2 * m) + 1))
+        numerator = -((a + m) * (qab + m) * x) / ((a + m2) * (qap + m2))
         d = 1.0 + (numerator * d)
-        d = 1e-30 if d.abs < 1e-30
+        d = tiny if d.abs < tiny
         c = 1.0 + (numerator / c)
-        c = 1e-30 if c.abs < 1e-30
+        c = tiny if c.abs < tiny
         d = 1.0 / d
         delta = c * d
-        f *= delta
+        fraction *= delta
-        break if (delta - 1.0).abs < 1e-10
+        break if (delta - 1.0).abs < 1e-12
       end
-      front * f
+      fraction
     end
     def significance_marker(p)

data/lib/rubric_llm/judge.rb CHANGED Viewed

@@ -22,7 +22,8 @@ module RubricLLM
         chat.with_params(max_tokens: config.max_tokens)
         full_system_prompt = build_system_prompt(system_prompt)
-        response = chat.ask(user_prompt, with: full_system_prompt)
+        chat.with_instructions(full_system_prompt)
+        response = chat.ask(user_prompt)
         parse_json(response.content)
       rescue StandardError => e
         raise JudgeError, "Judge call failed: #{e.message}" if attempts > config.max_retries

data/lib/rubric_llm/version.rb CHANGED Viewed

@@ -1,5 +1,5 @@
 # frozen_string_literal: true
 module RubricLLM
-  VERSION = "0.1.0"
+  VERSION = "0.1.1"
 end

data/lib/rubric_llm.rb CHANGED Viewed

@@ -26,8 +26,10 @@ module RubricLLM
     end
     def configure
-      yield(config)
-      config.validate!
+      new_config = Config.new(**config.to_h)
+      yield(new_config)
+      new_config.validate!
+      @config = new_config
     end
     def reset_configuration!
@@ -86,6 +88,7 @@ module RubricLLM
     private
     def evaluate_sample(evaluator, sample)
+      sample = normalize_sample(sample)
       evaluator.call(
         question: sample[:question],
         answer: sample[:answer],
@@ -122,5 +125,9 @@ module RubricLLM
       Config.new(**config.to_h.compact, custom_prompt:)
     end
+    def normalize_sample(sample)
+      sample.transform_keys(&:to_sym)
+    end
   end
 end

metadata CHANGED Viewed

@@ -1,7 +1,7 @@
 --- !ruby/object:Gem::Specification
 name: rubric_llm
 version: !ruby/object:Gem::Version
-  version: 0.1.0
+  version: 0.1.1
 platform: ruby
 authors:
 - David Paluy
@@ -79,7 +79,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
     - !ruby/object:Gem::Version
       version: '0'
 requirements: []
-rubygems_version: 4.0.6
+rubygems_version: 3.6.9
 specification_version: 4
 summary: Lightweight LLM evaluation framework for Ruby
 test_files: []