ruby_llm-tribunal 0.1.0 → 0.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 0cfe5bd072c4cc3499736cf095cdc4faea9778bb0feb368ac13c6735cd6239ce
4
- data.tar.gz: 6730343af6bd441998357fdc5a56c13ba5a3b1e226877e0d77d704947fe84883
3
+ metadata.gz: 95f293e61287cbd2f94763540bf159667d3e02223e0c8950d0f5e46622e1be40
4
+ data.tar.gz: b0e0c398b24b7847d3e4defd7f2200733493621f44f65129a818e0bef6c08894
5
5
  SHA512:
6
- metadata.gz: aacf8935874a75b51fcc3e6cd63b3d65d01b9f437bfc6b86a3fb496d50163b9fa122e615cbe3bd5177b8c05dd3305aadbac779385637203b1fc0e142099026a4
7
- data.tar.gz: 44415c718a94108c0f7416054dd4e6f2ecc4ee05647272b176402ff0419ad23efb547f5dd7de088a31027003d0c2a32ce508bcb8bca3be77dd23d74f51e25b69
6
+ metadata.gz: 05ee643802ca6de43a4cf89bbb5e9a528030c663e69097fadd26a15543be3b44bfa520956a6f1033c8add68cc505c92043786febcb7fe2b7c77972580102c541
7
+ data.tar.gz: 231b545756fca7e53210c9bc4598cc10ae5da16e8a576a6d2801c59c11bb65a4378c252b10bd269369530de26ece1f51e923342bc512cf73ea4b2df667787310
data/CHANGELOG.md CHANGED
@@ -6,6 +6,17 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
6
6
 
7
7
  ## [Unreleased]
8
8
 
9
+ ## [0.1.1] - 2026-01-16
10
+
11
+ ### Fixed
12
+
13
+ - **Critical**: Fixed incorrect threshold logic for negative metrics (toxicity, hallucination, bias, etc.) with `partial` verdicts. High scores on negative metrics now correctly result in failures.
14
+ - **Concurrency**: The `--concurrency` option now gracefully falls back to sequential execution when the `parallel` gem is not installed, with a helpful warning message.
15
+
16
+ ### Added
17
+
18
+ - Tests for negative metric edge cases (partial verdicts with inverted threshold logic)
19
+
9
20
  ## [0.1.0] - 2026-01-15
10
21
 
11
22
  ### Added
@@ -28,5 +39,6 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
28
39
  - Requires `ruby_llm` >= 1.0
29
40
  - Optional: `neighbor` gem for embedding-based similarity
30
41
 
31
- [Unreleased]: https://github.com/Alqemist-labs/ruby_llm-tribunal/compare/v0.1.0...HEAD
42
+ [Unreleased]: https://github.com/Alqemist-labs/ruby_llm-tribunal/compare/v0.1.1...HEAD
43
+ [0.1.1]: https://github.com/Alqemist-labs/ruby_llm-tribunal/compare/v0.1.0...v0.1.1
32
44
  [0.1.0]: https://github.com/Alqemist-labs/ruby_llm-tribunal/releases/tag/v0.1.0
@@ -140,7 +140,12 @@ module RubyLLM
140
140
  passed = case verdict
141
141
  when 'yes' then !negative_metric
142
142
  when 'no' then negative_metric
143
- when 'partial' then score.is_a?(Numeric) && score >= threshold
143
+ when 'partial'
144
+ return [:fail, details] unless score.is_a?(Numeric)
145
+
146
+ # For negative metrics (toxicity, hallucination, etc.), high score = bad
147
+ # So we invert the comparison: pass if score is LOW enough
148
+ negative_metric ? score <= (1.0 - threshold) : score >= threshold
144
149
  end
145
150
 
146
151
  passed ? [:pass, details] : [:fail, details]
@@ -10,8 +10,8 @@ module RubyLLM
10
10
  annotations = results[:cases]
11
11
  .select { |c| c[:status] == :failed }
12
12
  .map do |c|
13
- reasons = c[:failures].map { |type, reason| "#{type}: #{reason}" }.join('; ')
14
- "::error::#{c[:input]}: #{reasons}"
13
+ reasons = c[:failures].map { |type, reason| "#{type}: #{reason}" }.join('; ')
14
+ "::error::#{c[:input]}: #{reasons}"
15
15
  end
16
16
 
17
17
  summary = "::notice::Tribunal: #{results[:summary][:passed]}/#{results[:summary][:total]} passed " \
@@ -2,6 +2,6 @@
2
2
 
3
3
  module RubyLLM
4
4
  module Tribunal
5
- VERSION = '0.1.0'
5
+ VERSION = '0.1.1'
6
6
  end
7
7
  end
@@ -133,9 +133,15 @@ namespace :tribunal do
133
133
  cases = RubyLLM::Tribunal::Dataset.load_with_assertions(path)
134
134
 
135
135
  if concurrency > 1
136
- require 'parallel'
137
- Parallel.map(cases, in_threads: concurrency) do |test_case, assertions|
138
- run_case(test_case, assertions, provider)
136
+ begin
137
+ require 'parallel'
138
+ Parallel.map(cases, in_threads: concurrency) do |test_case, assertions|
139
+ run_case(test_case, assertions, provider)
140
+ end
141
+ rescue LoadError
142
+ warn "Warning: 'parallel' gem not installed, falling back to sequential execution."
143
+ warn ' Install with: gem install parallel'
144
+ cases.map { |test_case, assertions| run_case(test_case, assertions, provider) }
139
145
  end
140
146
  else
141
147
  cases.map { |test_case, assertions| run_case(test_case, assertions, provider) }
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: ruby_llm-tribunal
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.0
4
+ version: 0.1.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - Florian
8
- autorequire:
8
+ autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2026-01-15 00:00:00.000000000 Z
11
+ date: 2026-01-16 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: ruby_llm
@@ -77,7 +77,7 @@ metadata:
77
77
  source_code_uri: https://github.com/Alqemist-labs/ruby_llm-tribunal
78
78
  changelog_uri: https://github.com/Alqemist-labs/ruby_llm-tribunal/blob/main/CHANGELOG.md
79
79
  rubygems_mfa_required: 'true'
80
- post_install_message:
80
+ post_install_message:
81
81
  rdoc_options: []
82
82
  require_paths:
83
83
  - lib
@@ -92,8 +92,8 @@ required_rubygems_version: !ruby/object:Gem::Requirement
92
92
  - !ruby/object:Gem::Version
93
93
  version: '0'
94
94
  requirements: []
95
- rubygems_version: 3.0.3.1
96
- signing_key:
95
+ rubygems_version: 3.4.19
96
+ signing_key:
97
97
  specification_version: 4
98
98
  summary: LLM evaluation framework for Ruby
99
99
  test_files: []