RubyGems - ruby_llm-tribunal - Versions diffs - 0.1.0 → 0.1.1 - Mend

ruby_llm-tribunal 0.1.0 → 0.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (7) hide show

checksums.yaml +4 -4
data/CHANGELOG.md +13 -1
data/lib/ruby_llm/tribunal/assertions/judge.rb +6 -1
data/lib/ruby_llm/tribunal/reporters/github.rb +2 -2
data/lib/ruby_llm/tribunal/version.rb +1 -1
data/lib/tasks/tribunal.rake +9 -3
metadata +6 -6

checksums.yaml CHANGED Viewed

@@ -1,7 +1,7 @@
 ---
 SHA256:
-  metadata.gz: 0cfe5bd072c4cc3499736cf095cdc4faea9778bb0feb368ac13c6735cd6239ce
-  data.tar.gz: 6730343af6bd441998357fdc5a56c13ba5a3b1e226877e0d77d704947fe84883
+  metadata.gz: 95f293e61287cbd2f94763540bf159667d3e02223e0c8950d0f5e46622e1be40
+  data.tar.gz: b0e0c398b24b7847d3e4defd7f2200733493621f44f65129a818e0bef6c08894
 SHA512:
-  metadata.gz: aacf8935874a75b51fcc3e6cd63b3d65d01b9f437bfc6b86a3fb496d50163b9fa122e615cbe3bd5177b8c05dd3305aadbac779385637203b1fc0e142099026a4
-  data.tar.gz: 44415c718a94108c0f7416054dd4e6f2ecc4ee05647272b176402ff0419ad23efb547f5dd7de088a31027003d0c2a32ce508bcb8bca3be77dd23d74f51e25b69
+  metadata.gz: 05ee643802ca6de43a4cf89bbb5e9a528030c663e69097fadd26a15543be3b44bfa520956a6f1033c8add68cc505c92043786febcb7fe2b7c77972580102c541
+  data.tar.gz: 231b545756fca7e53210c9bc4598cc10ae5da16e8a576a6d2801c59c11bb65a4378c252b10bd269369530de26ece1f51e923342bc512cf73ea4b2df667787310

data/CHANGELOG.md CHANGED Viewed

@@ -6,6 +6,17 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
 ## [Unreleased]
+## [0.1.1] - 2026-01-16
+### Fixed
+- **Critical**: Fixed incorrect threshold logic for negative metrics (toxicity, hallucination, bias, etc.) with `partial` verdicts. High scores on negative metrics now correctly result in failures.
+- **Concurrency**: The `--concurrency` option now gracefully falls back to sequential execution when the `parallel` gem is not installed, with a helpful warning message.
+### Added
+- Tests for negative metric edge cases (partial verdicts with inverted threshold logic)
 ## [0.1.0] - 2026-01-15
 ### Added
@@ -28,5 +39,6 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
 - Requires `ruby_llm` >= 1.0
 - Optional: `neighbor` gem for embedding-based similarity
-[Unreleased]: https://github.com/Alqemist-labs/ruby_llm-tribunal/compare/v0.1.0...HEAD
+[Unreleased]: https://github.com/Alqemist-labs/ruby_llm-tribunal/compare/v0.1.1...HEAD
+[0.1.1]: https://github.com/Alqemist-labs/ruby_llm-tribunal/compare/v0.1.0...v0.1.1
 [0.1.0]: https://github.com/Alqemist-labs/ruby_llm-tribunal/releases/tag/v0.1.0

data/lib/ruby_llm/tribunal/assertions/judge.rb CHANGED Viewed

@@ -140,7 +140,12 @@ module RubyLLM
             passed = case verdict
                      when 'yes' then !negative_metric
                      when 'no' then negative_metric
-                     when 'partial' then score.is_a?(Numeric) && score >= threshold
+                     when 'partial'
+                       return [:fail, details] unless score.is_a?(Numeric)
+                       # For negative metrics (toxicity, hallucination, etc.), high score = bad
+                       # So we invert the comparison: pass if score is LOW enough
+                       negative_metric ? score <= (1.0 - threshold) : score >= threshold
                      end
             passed ? [:pass, details] : [:fail, details]

data/lib/ruby_llm/tribunal/reporters/github.rb CHANGED Viewed

@@ -10,8 +10,8 @@ module RubyLLM
             annotations = results[:cases]
                           .select { |c| c[:status] == :failed }
                           .map do |c|
-                            reasons = c[:failures].map { |type, reason| "#{type}: #{reason}" }.join('; ')
-                            "::error::#{c[:input]}: #{reasons}"
+              reasons = c[:failures].map { |type, reason| "#{type}: #{reason}" }.join('; ')
+              "::error::#{c[:input]}: #{reasons}"
             end
             summary = "::notice::Tribunal: #{results[:summary][:passed]}/#{results[:summary][:total]} passed " \

data/lib/ruby_llm/tribunal/version.rb CHANGED Viewed

@@ -2,6 +2,6 @@
 module RubyLLM
   module Tribunal
-    VERSION = '0.1.0'
+    VERSION = '0.1.1'
   end
 end

data/lib/tasks/tribunal.rake CHANGED Viewed

@@ -133,9 +133,15 @@ namespace :tribunal do
     cases = RubyLLM::Tribunal::Dataset.load_with_assertions(path)
     if concurrency > 1
-      require 'parallel'
-      Parallel.map(cases, in_threads: concurrency) do |test_case, assertions|
-        run_case(test_case, assertions, provider)
+      begin
+        require 'parallel'
+        Parallel.map(cases, in_threads: concurrency) do |test_case, assertions|
+          run_case(test_case, assertions, provider)
+        end
+      rescue LoadError
+        warn "Warning: 'parallel' gem not installed, falling back to sequential execution."
+        warn '  Install with: gem install parallel'
+        cases.map { |test_case, assertions| run_case(test_case, assertions, provider) }
       end
     else
       cases.map { |test_case, assertions| run_case(test_case, assertions, provider) }

metadata CHANGED Viewed

@@ -1,14 +1,14 @@
 --- !ruby/object:Gem::Specification
 name: ruby_llm-tribunal
 version: !ruby/object:Gem::Version
-  version: 0.1.0
+  version: 0.1.1
 platform: ruby
 authors:
 - Florian
-autorequire:
+autorequire:
 bindir: exe
 cert_chain: []
-date: 2026-01-15 00:00:00.000000000 Z
+date: 2026-01-16 00:00:00.000000000 Z
 dependencies:
 - !ruby/object:Gem::Dependency
   name: ruby_llm
@@ -77,7 +77,7 @@ metadata:
   source_code_uri: https://github.com/Alqemist-labs/ruby_llm-tribunal
   changelog_uri: https://github.com/Alqemist-labs/ruby_llm-tribunal/blob/main/CHANGELOG.md
   rubygems_mfa_required: 'true'
-post_install_message:
+post_install_message:
 rdoc_options: []
 require_paths:
 - lib
@@ -92,8 +92,8 @@ required_rubygems_version: !ruby/object:Gem::Requirement
     - !ruby/object:Gem::Version
       version: '0'
 requirements: []
-rubygems_version: 3.0.3.1
-signing_key:
+rubygems_version: 3.4.19
+signing_key:
 specification_version: 4
 summary: LLM evaluation framework for Ruby
 test_files: []