lex-eval 0.2.1 → 0.2.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 1dd068d711cd3cc0c70d64f8c066e1bb03e929bc034073600a8e3946c7c65a77
4
- data.tar.gz: 6103505a44655acc55a78ac3677b2d8fef300e395d33acb47e5a545cd0f7e8e3
3
+ metadata.gz: 7986f7010e32abbdf158a8525f35806aee77c3b5e160e0933f436c92a1c7c7ec
4
+ data.tar.gz: c27e8719b565902494b89ec579cdcd3cf365dd6a223a5dcb7f4089601b35552f
5
5
  SHA512:
6
- metadata.gz: 4b0ef19e8406c5eaf2914b22aaef87913a775a19b89b21f35fc2b9cfbfdb3f135013e027c3eefd854963700cd91ef08be0c1f3976cf597f1c2f358430a4cb565
7
- data.tar.gz: 543c853757732ced23ebdbf4d5caa1ef09a91ca9ae4f20b36d75d32cb383d5d56c50548ea873199fcb2324a44dd77ee9c8ac8efe51568a4c94d421b5e45e53d9
6
+ metadata.gz: f75b481668bc142794f1f683c1e8eac55d6b26e333c5a7ecdd91afe748a454d5dbc47e81b99c61cc8c910421f84d1789d8ba38370ac1c4867a98ce4bfea1582d
7
+ data.tar.gz: afd865e08ba92c6bf4f231ddf8025af16a4e3fd623e6a0dce2d5e49975020b38c316b36d976e5a3e7875ca4d3b66d0bfdf4c7d27fff218527b20ebeed5f235c4
@@ -0,0 +1,53 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'legion/extensions/actors/subscription' unless defined?(Legion::Extensions::Actors::Subscription)
4
+
5
+ module Legion
6
+ module Extensions
7
+ module Eval
8
+ module Actor
9
+ class Online < Legion::Extensions::Actors::Subscription
10
+ EXCHANGE = 'llm.response'
11
+ QUEUE = 'eval.online'
12
+
13
+ def runner_class
14
+ Legion::Extensions::Eval::Runners::Online
15
+ end
16
+
17
+ def runner_function
18
+ 'evaluate_response'
19
+ end
20
+
21
+ def check_subtask?
22
+ false
23
+ end
24
+
25
+ def generate_task?
26
+ false
27
+ end
28
+
29
+ def use_runner?
30
+ false
31
+ end
32
+
33
+ def enabled?
34
+ return false unless defined?(Legion::Transport)
35
+ return false unless defined?(Legion::Extensions::Eval::Runners::Online)
36
+
37
+ online_enabled?
38
+ rescue StandardError
39
+ false
40
+ end
41
+
42
+ private
43
+
44
+ def online_enabled?
45
+ return true unless defined?(Legion::Settings)
46
+
47
+ Legion::Settings.dig(:eval, :online, :enabled) != false
48
+ end
49
+ end
50
+ end
51
+ end
52
+ end
53
+ end
@@ -7,6 +7,7 @@ module Legion
7
7
  include Runners::Evaluation
8
8
  include Runners::Annotation
9
9
  include Runners::AgenticReview
10
+ include Runners::Online
10
11
 
11
12
  def initialize(db: nil, **opts)
12
13
  @db = db
@@ -0,0 +1,56 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Legion
4
+ module Extensions
5
+ module Eval
6
+ module Runners
7
+ module Online
8
+ def evaluate_response(response:, evaluators: nil, sample_rate: 1.0, **)
9
+ evaluator_names = evaluators || configured_evaluators
10
+ effective_rate = sample_rate || configured_sample_rate
11
+
12
+ return { evaluated: false, reason: :sampled_out, sampled: false } unless rand <= effective_rate
13
+
14
+ scores = {}
15
+ evaluator_names.each do |name|
16
+ scores[name.to_sym] = run_single_evaluator(name, response)
17
+ end
18
+
19
+ { evaluated: true, scores: scores, sampled: true }
20
+ rescue StandardError => e
21
+ Legion::Logging.warn("lex-eval online: evaluate_response failed: #{e.message}") if defined?(Legion::Logging)
22
+ { evaluated: false, reason: :error, error: e.message, sampled: true }
23
+ end
24
+
25
+ private
26
+
27
+ def run_single_evaluator(name, response)
28
+ loader = Helpers::TemplateLoader.new
29
+ config = loader.load_template(name.to_s) || {}
30
+ result = run_evaluation(
31
+ evaluator_name: name,
32
+ evaluator_config: config,
33
+ inputs: [{ input: response[:input] || '', output: response[:output] || '' }]
34
+ )
35
+ result.dig(:summary, :avg_score)
36
+ rescue StandardError => e
37
+ Legion::Logging.warn("lex-eval online: evaluator #{name} failed: #{e.message}") if defined?(Legion::Logging)
38
+ nil
39
+ end
40
+
41
+ def configured_evaluators
42
+ return %w[toxicity] unless defined?(Legion::Settings)
43
+
44
+ Legion::Settings.dig(:eval, :online, :evaluators) || %w[toxicity]
45
+ end
46
+
47
+ def configured_sample_rate
48
+ return 1.0 unless defined?(Legion::Settings)
49
+
50
+ Legion::Settings.dig(:eval, :online, :sample_rate) || 1.0
51
+ end
52
+ end
53
+ end
54
+ end
55
+ end
56
+ end
@@ -3,7 +3,7 @@
3
3
  module Legion
4
4
  module Extensions
5
5
  module Eval
6
- VERSION = '0.2.1'
6
+ VERSION = '0.2.2'
7
7
  end
8
8
  end
9
9
  end
@@ -10,6 +10,7 @@ require_relative 'eval/helpers/guardrails'
10
10
  require_relative 'eval/runners/evaluation'
11
11
  require_relative 'eval/runners/annotation'
12
12
  require_relative 'eval/runners/agentic_review'
13
+ require_relative 'eval/runners/online'
13
14
  require_relative 'eval/client'
14
15
 
15
16
  module Legion
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: lex-eval
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.1
4
+ version: 0.2.2
5
5
  platform: ruby
6
6
  authors:
7
7
  - Matthew Iverson
@@ -19,6 +19,7 @@ extra_rdoc_files: []
19
19
  files:
20
20
  - README.md
21
21
  - lib/legion/extensions/eval.rb
22
+ - lib/legion/extensions/eval/actors/online.rb
22
23
  - lib/legion/extensions/eval/client.rb
23
24
  - lib/legion/extensions/eval/evaluators/base.rb
24
25
  - lib/legion/extensions/eval/evaluators/code_evaluator.rb
@@ -32,6 +33,7 @@ files:
32
33
  - lib/legion/extensions/eval/runners/agentic_review.rb
33
34
  - lib/legion/extensions/eval/runners/annotation.rb
34
35
  - lib/legion/extensions/eval/runners/evaluation.rb
36
+ - lib/legion/extensions/eval/runners/online.rb
35
37
  - lib/legion/extensions/eval/templates/code_generation.yml
36
38
  - lib/legion/extensions/eval/templates/code_readability.yml
37
39
  - lib/legion/extensions/eval/templates/faithfulness.yml