lex-eval 0.2.1 → 0.2.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: 7986f7010e32abbdf158a8525f35806aee77c3b5e160e0933f436c92a1c7c7ec
|
|
4
|
+
data.tar.gz: c27e8719b565902494b89ec579cdcd3cf365dd6a223a5dcb7f4089601b35552f
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: f75b481668bc142794f1f683c1e8eac55d6b26e333c5a7ecdd91afe748a454d5dbc47e81b99c61cc8c910421f84d1789d8ba38370ac1c4867a98ce4bfea1582d
|
|
7
|
+
data.tar.gz: afd865e08ba92c6bf4f231ddf8025af16a4e3fd623e6a0dce2d5e49975020b38c316b36d976e5a3e7875ca4d3b66d0bfdf4c7d27fff218527b20ebeed5f235c4
|
|
@@ -0,0 +1,53 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require 'legion/extensions/actors/subscription' unless defined?(Legion::Extensions::Actors::Subscription)
|
|
4
|
+
|
|
5
|
+
module Legion
|
|
6
|
+
module Extensions
|
|
7
|
+
module Eval
|
|
8
|
+
module Actor
|
|
9
|
+
class Online < Legion::Extensions::Actors::Subscription
|
|
10
|
+
EXCHANGE = 'llm.response'
|
|
11
|
+
QUEUE = 'eval.online'
|
|
12
|
+
|
|
13
|
+
def runner_class
|
|
14
|
+
Legion::Extensions::Eval::Runners::Online
|
|
15
|
+
end
|
|
16
|
+
|
|
17
|
+
def runner_function
|
|
18
|
+
'evaluate_response'
|
|
19
|
+
end
|
|
20
|
+
|
|
21
|
+
def check_subtask?
|
|
22
|
+
false
|
|
23
|
+
end
|
|
24
|
+
|
|
25
|
+
def generate_task?
|
|
26
|
+
false
|
|
27
|
+
end
|
|
28
|
+
|
|
29
|
+
def use_runner?
|
|
30
|
+
false
|
|
31
|
+
end
|
|
32
|
+
|
|
33
|
+
def enabled?
|
|
34
|
+
return false unless defined?(Legion::Transport)
|
|
35
|
+
return false unless defined?(Legion::Extensions::Eval::Runners::Online)
|
|
36
|
+
|
|
37
|
+
online_enabled?
|
|
38
|
+
rescue StandardError
|
|
39
|
+
false
|
|
40
|
+
end
|
|
41
|
+
|
|
42
|
+
private
|
|
43
|
+
|
|
44
|
+
def online_enabled?
|
|
45
|
+
return true unless defined?(Legion::Settings)
|
|
46
|
+
|
|
47
|
+
Legion::Settings.dig(:eval, :online, :enabled) != false
|
|
48
|
+
end
|
|
49
|
+
end
|
|
50
|
+
end
|
|
51
|
+
end
|
|
52
|
+
end
|
|
53
|
+
end
|
|
@@ -0,0 +1,56 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Legion
|
|
4
|
+
module Extensions
|
|
5
|
+
module Eval
|
|
6
|
+
module Runners
|
|
7
|
+
module Online
|
|
8
|
+
def evaluate_response(response:, evaluators: nil, sample_rate: 1.0, **)
|
|
9
|
+
evaluator_names = evaluators || configured_evaluators
|
|
10
|
+
effective_rate = sample_rate || configured_sample_rate
|
|
11
|
+
|
|
12
|
+
return { evaluated: false, reason: :sampled_out, sampled: false } unless rand <= effective_rate
|
|
13
|
+
|
|
14
|
+
scores = {}
|
|
15
|
+
evaluator_names.each do |name|
|
|
16
|
+
scores[name.to_sym] = run_single_evaluator(name, response)
|
|
17
|
+
end
|
|
18
|
+
|
|
19
|
+
{ evaluated: true, scores: scores, sampled: true }
|
|
20
|
+
rescue StandardError => e
|
|
21
|
+
Legion::Logging.warn("lex-eval online: evaluate_response failed: #{e.message}") if defined?(Legion::Logging)
|
|
22
|
+
{ evaluated: false, reason: :error, error: e.message, sampled: true }
|
|
23
|
+
end
|
|
24
|
+
|
|
25
|
+
private
|
|
26
|
+
|
|
27
|
+
def run_single_evaluator(name, response)
|
|
28
|
+
loader = Helpers::TemplateLoader.new
|
|
29
|
+
config = loader.load_template(name.to_s) || {}
|
|
30
|
+
result = run_evaluation(
|
|
31
|
+
evaluator_name: name,
|
|
32
|
+
evaluator_config: config,
|
|
33
|
+
inputs: [{ input: response[:input] || '', output: response[:output] || '' }]
|
|
34
|
+
)
|
|
35
|
+
result.dig(:summary, :avg_score)
|
|
36
|
+
rescue StandardError => e
|
|
37
|
+
Legion::Logging.warn("lex-eval online: evaluator #{name} failed: #{e.message}") if defined?(Legion::Logging)
|
|
38
|
+
nil
|
|
39
|
+
end
|
|
40
|
+
|
|
41
|
+
def configured_evaluators
|
|
42
|
+
return %w[toxicity] unless defined?(Legion::Settings)
|
|
43
|
+
|
|
44
|
+
Legion::Settings.dig(:eval, :online, :evaluators) || %w[toxicity]
|
|
45
|
+
end
|
|
46
|
+
|
|
47
|
+
def configured_sample_rate
|
|
48
|
+
return 1.0 unless defined?(Legion::Settings)
|
|
49
|
+
|
|
50
|
+
Legion::Settings.dig(:eval, :online, :sample_rate) || 1.0
|
|
51
|
+
end
|
|
52
|
+
end
|
|
53
|
+
end
|
|
54
|
+
end
|
|
55
|
+
end
|
|
56
|
+
end
|
|
@@ -10,6 +10,7 @@ require_relative 'eval/helpers/guardrails'
|
|
|
10
10
|
require_relative 'eval/runners/evaluation'
|
|
11
11
|
require_relative 'eval/runners/annotation'
|
|
12
12
|
require_relative 'eval/runners/agentic_review'
|
|
13
|
+
require_relative 'eval/runners/online'
|
|
13
14
|
require_relative 'eval/client'
|
|
14
15
|
|
|
15
16
|
module Legion
|
metadata
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: lex-eval
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 0.2.
|
|
4
|
+
version: 0.2.2
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
7
|
- Matthew Iverson
|
|
@@ -19,6 +19,7 @@ extra_rdoc_files: []
|
|
|
19
19
|
files:
|
|
20
20
|
- README.md
|
|
21
21
|
- lib/legion/extensions/eval.rb
|
|
22
|
+
- lib/legion/extensions/eval/actors/online.rb
|
|
22
23
|
- lib/legion/extensions/eval/client.rb
|
|
23
24
|
- lib/legion/extensions/eval/evaluators/base.rb
|
|
24
25
|
- lib/legion/extensions/eval/evaluators/code_evaluator.rb
|
|
@@ -32,6 +33,7 @@ files:
|
|
|
32
33
|
- lib/legion/extensions/eval/runners/agentic_review.rb
|
|
33
34
|
- lib/legion/extensions/eval/runners/annotation.rb
|
|
34
35
|
- lib/legion/extensions/eval/runners/evaluation.rb
|
|
36
|
+
- lib/legion/extensions/eval/runners/online.rb
|
|
35
37
|
- lib/legion/extensions/eval/templates/code_generation.yml
|
|
36
38
|
- lib/legion/extensions/eval/templates/code_readability.yml
|
|
37
39
|
- lib/legion/extensions/eval/templates/faithfulness.yml
|